diff options
Diffstat (limited to 'src/gallium/drivers/r600')
23 files changed, 527 insertions, 315 deletions
diff --git a/src/gallium/drivers/r600/eg_hw_states.c b/src/gallium/drivers/r600/eg_hw_states.c index d6f417e1e3..3d10095919 100644 --- a/src/gallium/drivers/r600/eg_hw_states.c +++ b/src/gallium/drivers/r600/eg_hw_states.c @@ -422,11 +422,11 @@ static void eg_dsa(struct r600_context *rctx, struct radeon_state *rstate) S_028D10_FORCE_HIS_ENABLE0(V_028D10_FORCE_DISABLE) | S_028D10_FORCE_HIS_ENABLE1(V_028D10_FORCE_DISABLE); - query_running = false; + query_running = FALSE; LIST_FOR_EACH_ENTRY(rquery, &rctx->query_list, list) { if (rquery->state & R600_QUERY_STATE_STARTED) { - query_running = true; + query_running = TRUE; } } @@ -471,6 +471,7 @@ static void eg_sampler_border(struct r600_context *rctx, struct radeon_state *rs radeon_state_init(rstate, rscreen->rw, R600_STATE_SAMPLER_BORDER, id, R600_SHADER_PS); if (uc.ui) { + rstate->states[EG_PS_SAMPLER_BORDER__TD_PS_SAMPLER0_BORDER_INDEX] = id; rstate->states[EG_PS_SAMPLER_BORDER__TD_PS_SAMPLER0_BORDER_RED] = fui(state->border_color[0]); rstate->states[EG_PS_SAMPLER_BORDER__TD_PS_SAMPLER0_BORDER_GREEN] = fui(state->border_color[1]); rstate->states[EG_PS_SAMPLER_BORDER__TD_PS_SAMPLER0_BORDER_BLUE] = fui(state->border_color[2]); @@ -559,8 +560,7 @@ static void eg_resource(struct pipe_context *ctx, struct radeon_state *rstate, rstate->placement[2] = RADEON_GEM_DOMAIN_GTT; rstate->placement[3] = RADEON_GEM_DOMAIN_GTT; - pitch = (tmp->pitch[0] / tmp->bpt); - pitch = (pitch + 0x7) & ~0x7; + pitch = align(tmp->pitch[0] / tmp->bpt, 8); /* FIXME properly handle first level != 0 */ rstate->states[EG_PS_RESOURCE__RESOURCE0_WORD0] = @@ -930,7 +930,7 @@ static int eg_ps_shader(struct r600_context *rctx, struct r600_context_state *rp const struct pipe_rasterizer_state *rasterizer; struct r600_shader *rshader = &rpshader->shader; unsigned i, tmp, exports_ps, num_cout; - boolean have_pos = FALSE; + boolean have_pos = FALSE, have_face = FALSE; rasterizer = &rctx->rasterizer->state.rasterizer; @@ -945,6 +945,10 @@ static int eg_ps_shader(struct r600_context *rctx, struct r600_context_state *rp rshader->input[i].name == TGSI_SEMANTIC_POSITION) { tmp |= S_028644_FLAT_SHADE(rshader->flat_shade); } + + if (rshader->input[i].name == TGSI_SEMANTIC_FACE) + have_face = TRUE; + if (rasterizer->sprite_coord_enable & (1 << i)) { tmp |= S_028644_PT_SPRITE_TEX(1); } @@ -957,10 +961,10 @@ static int eg_ps_shader(struct r600_context *rctx, struct r600_context_state *rp if (rshader->output[i].name == TGSI_SEMANTIC_POSITION) exports_ps |= 1; else if (rshader->output[i].name == TGSI_SEMANTIC_COLOR) { - exports_ps |= (1 << (num_cout+1)); num_cout++; } } + exports_ps |= (1 << num_cout); if (!exports_ps) { /* always at least export 1 component per pixel */ exports_ps = 2; @@ -971,7 +975,10 @@ static int eg_ps_shader(struct r600_context *rctx, struct r600_context_state *rp state->states[EG_PS_SHADER__SPI_PS_IN_CONTROL_0] |= S_0286CC_POSITION_ENA(1); state->states[EG_PS_SHADER__SPI_INPUT_Z] |= 1; } + state->states[EG_PS_SHADER__SPI_PS_IN_CONTROL_1] = 0x00000000; + state->states[EG_PS_SHADER__SPI_PS_IN_CONTROL_1] |= S_0286D0_FRONT_FACE_ENA(have_face); + state->states[EG_PS_SHADER__SQ_PGM_RESOURCES_PS] = S_028844_NUM_GPRS(rshader->bc.ngpr) | S_028844_PRIME_CACHE_ON_DRAW(1) | S_028844_STACK_SIZE(rshader->bc.nstack); state->states[EG_PS_SHADER__SQ_PGM_EXPORTS_PS] = exports_ps; diff --git a/src/gallium/drivers/r600/eg_state_inlines.h b/src/gallium/drivers/r600/eg_state_inlines.h index 4e3514638b..0a42abcdf2 100644 --- a/src/gallium/drivers/r600/eg_state_inlines.h +++ b/src/gallium/drivers/r600/eg_state_inlines.h @@ -283,6 +283,9 @@ static inline uint32_t r600_translate_colorswap(enum pipe_format format) case PIPE_FORMAT_B4G4R4A4_UNORM: case PIPE_FORMAT_B4G4R4X4_UNORM: return V_028C70_SWAP_ALT; + + case PIPE_FORMAT_Z16_UNORM: + return V_028C70_SWAP_STD; /* 32-bit buffers. */ case PIPE_FORMAT_A8B8G8R8_SRGB: @@ -310,6 +313,10 @@ static inline uint32_t r600_translate_colorswap(enum pipe_format format) case PIPE_FORMAT_Z24_UNORM_S8_USCALED: return V_028C70_SWAP_STD; + case PIPE_FORMAT_X8Z24_UNORM: + case PIPE_FORMAT_S8_USCALED_Z24_UNORM: + return V_028C70_SWAP_STD; + case PIPE_FORMAT_R10G10B10A2_UNORM: case PIPE_FORMAT_R10G10B10X2_SNORM: case PIPE_FORMAT_B10G10R10A2_UNORM: @@ -357,6 +364,9 @@ static INLINE uint32_t r600_translate_colorformat(enum pipe_format format) case PIPE_FORMAT_B4G4R4X4_UNORM: return V_028C70_COLOR_4_4_4_4; + case PIPE_FORMAT_Z16_UNORM: + return V_028C70_COLOR_16; + /* 32-bit buffers. */ case PIPE_FORMAT_A8B8G8R8_SRGB: case PIPE_FORMAT_A8B8G8R8_UNORM: @@ -383,6 +393,10 @@ static INLINE uint32_t r600_translate_colorformat(enum pipe_format format) case PIPE_FORMAT_Z24_UNORM_S8_USCALED: return V_028C70_COLOR_8_24; + case PIPE_FORMAT_X8Z24_UNORM: + case PIPE_FORMAT_S8_USCALED_Z24_UNORM: + return V_028C70_COLOR_24_8; + case PIPE_FORMAT_R32_FLOAT: return V_028C70_COLOR_32_FLOAT; diff --git a/src/gallium/drivers/r600/eg_states_inc.h b/src/gallium/drivers/r600/eg_states_inc.h index 462f31cc79..9f8007c8e9 100644 --- a/src/gallium/drivers/r600/eg_states_inc.h +++ b/src/gallium/drivers/r600/eg_states_inc.h @@ -368,27 +368,30 @@ #define EG_GS_SAMPLER_PM4 128 /* EG_PS_SAMPLER_BORDER */ -#define EG_PS_SAMPLER_BORDER__TD_PS_SAMPLER0_BORDER_RED 0 -#define EG_PS_SAMPLER_BORDER__TD_PS_SAMPLER0_BORDER_GREEN 1 -#define EG_PS_SAMPLER_BORDER__TD_PS_SAMPLER0_BORDER_BLUE 2 -#define EG_PS_SAMPLER_BORDER__TD_PS_SAMPLER0_BORDER_ALPHA 3 -#define EG_PS_SAMPLER_BORDER_SIZE 4 +#define EG_PS_SAMPLER_BORDER__TD_PS_SAMPLER0_BORDER_INDEX 0 +#define EG_PS_SAMPLER_BORDER__TD_PS_SAMPLER0_BORDER_RED 1 +#define EG_PS_SAMPLER_BORDER__TD_PS_SAMPLER0_BORDER_GREEN 2 +#define EG_PS_SAMPLER_BORDER__TD_PS_SAMPLER0_BORDER_BLUE 3 +#define EG_PS_SAMPLER_BORDER__TD_PS_SAMPLER0_BORDER_ALPHA 4 +#define EG_PS_SAMPLER_BORDER_SIZE 5 #define EG_PS_SAMPLER_BORDER_PM4 128 /* EG_VS_SAMPLER_BORDER */ -#define EG_VS_SAMPLER_BORDER__TD_VS_SAMPLER0_BORDER_RED 0 -#define EG_VS_SAMPLER_BORDER__TD_VS_SAMPLER0_BORDER_GREEN 1 -#define EG_VS_SAMPLER_BORDER__TD_VS_SAMPLER0_BORDER_BLUE 2 -#define EG_VS_SAMPLER_BORDER__TD_VS_SAMPLER0_BORDER_ALPHA 3 -#define EG_VS_SAMPLER_BORDER_SIZE 4 +#define EG_VS_SAMPLER_BORDER__TD_VS_SAMPLER0_BORDER_INDEX 0 +#define EG_VS_SAMPLER_BORDER__TD_VS_SAMPLER0_BORDER_RED 1 +#define EG_VS_SAMPLER_BORDER__TD_VS_SAMPLER0_BORDER_GREEN 2 +#define EG_VS_SAMPLER_BORDER__TD_VS_SAMPLER0_BORDER_BLUE 3 +#define EG_VS_SAMPLER_BORDER__TD_VS_SAMPLER0_BORDER_ALPHA 4 +#define EG_VS_SAMPLER_BORDER_SIZE 5 #define EG_VS_SAMPLER_BORDER_PM4 128 /* EG_GS_SAMPLER_BORDER */ -#define EG_GS_SAMPLER_BORDER__TD_GS_SAMPLER0_BORDER_RED 0 -#define EG_GS_SAMPLER_BORDER__TD_GS_SAMPLER0_BORDER_GREEN 1 -#define EG_GS_SAMPLER_BORDER__TD_GS_SAMPLER0_BORDER_BLUE 2 -#define EG_GS_SAMPLER_BORDER__TD_GS_SAMPLER0_BORDER_ALPHA 3 -#define EG_GS_SAMPLER_BORDER_SIZE 4 +#define EG_GS_SAMPLER_BORDER__TD_GS_SAMPLER0_BORDER_INDEX 0 +#define EG_GS_SAMPLER_BORDER__TD_GS_SAMPLER0_BORDER_RED 1 +#define EG_GS_SAMPLER_BORDER__TD_GS_SAMPLER0_BORDER_GREEN 2 +#define EG_GS_SAMPLER_BORDER__TD_GS_SAMPLER0_BORDER_BLUE 3 +#define EG_GS_SAMPLER_BORDER__TD_GS_SAMPLER0_BORDER_ALPHA 4 +#define EG_GS_SAMPLER_BORDER_SIZE 5 #define EG_GS_SAMPLER_BORDER_PM4 128 /* EG_CB */ diff --git a/src/gallium/drivers/r600/r600.h b/src/gallium/drivers/r600/r600.h index bce2707e77..a123eb62e0 100644 --- a/src/gallium/drivers/r600/r600.h +++ b/src/gallium/drivers/r600/r600.h @@ -90,28 +90,15 @@ enum radeon_family { CHIP_LAST, }; +enum chip_class { + R600, + R700, + EVERGREEN, +}; + enum radeon_family r600_get_family(struct radeon *rw); +enum chip_class r600_get_family_class(struct radeon *radeon); -/* - * radeon object functions - */ -#if 0 -struct radeon_bo { - unsigned refcount; - unsigned handle; - unsigned size; - unsigned alignment; - unsigned map_count; - void *data; -}; -struct radeon_bo *radeon_bo(struct radeon *radeon, unsigned handle, - unsigned size, unsigned alignment, void *ptr); -int radeon_bo_map(struct radeon *radeon, struct radeon_bo *bo); -void radeon_bo_unmap(struct radeon *radeon, struct radeon_bo *bo); -struct radeon_bo *radeon_bo_incref(struct radeon *radeon, struct radeon_bo *bo); -struct radeon_bo *radeon_bo_decref(struct radeon *radeon, struct radeon_bo *bo); -int radeon_bo_wait(struct radeon *radeon, struct radeon_bo *bo); -#endif /* lowlevel WS bo */ struct radeon_ws_bo; struct radeon_ws_bo *radeon_ws_bo(struct radeon *radeon, @@ -122,7 +109,6 @@ void *radeon_ws_bo_map(struct radeon *radeon, struct radeon_ws_bo *bo, unsigned void radeon_ws_bo_unmap(struct radeon *radeon, struct radeon_ws_bo *bo); void radeon_ws_bo_reference(struct radeon *radeon, struct radeon_ws_bo **dst, struct radeon_ws_bo *src); -int radeon_ws_bo_wait(struct radeon *radeon, struct radeon_ws_bo *bo); /* R600/R700 STATES */ #define R600_GROUP_MAX 16 diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c index 0d17f75da7..dcb1b4fccc 100644 --- a/src/gallium/drivers/r600/r600_asm.c +++ b/src/gallium/drivers/r600/r600_asm.c @@ -531,7 +531,8 @@ int r600_bc_add_vtx(struct r600_bc *bc, const struct r600_bc_vtx *vtx) /* cf can contains only alu or only vtx or only tex */ if (bc->cf_last == NULL || (bc->cf_last->inst != V_SQ_CF_WORD1_SQ_CF_INST_VTX && - bc->cf_last->inst != V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC)) { + bc->cf_last->inst != V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC) || + bc->force_add_cf) { r = r600_bc_add_cf(bc); if (r) { free(nvtx); @@ -543,6 +544,8 @@ int r600_bc_add_vtx(struct r600_bc *bc, const struct r600_bc_vtx *vtx) /* each fetch use 4 dwords */ bc->cf_last->ndw += 4; bc->ndw += 4; + if ((bc->ndw / 4) > 7) + bc->force_add_cf = 1; return 0; } @@ -557,7 +560,8 @@ int r600_bc_add_tex(struct r600_bc *bc, const struct r600_bc_tex *tex) /* cf can contains only alu or only vtx or only tex */ if (bc->cf_last == NULL || - bc->cf_last->inst != V_SQ_CF_WORD1_SQ_CF_INST_TEX) { + bc->cf_last->inst != V_SQ_CF_WORD1_SQ_CF_INST_TEX || + bc->force_add_cf) { r = r600_bc_add_cf(bc); if (r) { free(ntex); @@ -569,6 +573,8 @@ int r600_bc_add_tex(struct r600_bc *bc, const struct r600_bc_tex *tex) /* each texture fetch use 4 dwords */ bc->cf_last->ndw += 4; bc->ndw += 4; + if ((bc->ndw / 4) > 7) + bc->force_add_cf = 1; return 0; } @@ -696,6 +702,7 @@ static int r600_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf) bc->bytecode[id++] = S_SQ_CF_ALU_WORD1_CF_INST(cf->inst >> 3) | S_SQ_CF_ALU_WORD1_BARRIER(1) | + S_SQ_CF_ALU_WORD1_USES_WATERFALL(bc->chiprev == 0 ? cf->r6xx_uses_waterfall : 0) | S_SQ_CF_ALU_WORD1_COUNT((cf->ndw / 2) - 1); break; case V_SQ_CF_WORD1_SQ_CF_INST_TEX: diff --git a/src/gallium/drivers/r600/r600_asm.h b/src/gallium/drivers/r600/r600_asm.h index 62a46cb0e1..6aadf72957 100644 --- a/src/gallium/drivers/r600/r600_asm.h +++ b/src/gallium/drivers/r600/r600_asm.h @@ -127,6 +127,7 @@ struct r600_bc_cf { unsigned pop_count; unsigned cf_addr; /* control flow addr */ unsigned kcache0_mode; + unsigned r6xx_uses_waterfall; struct list_head alu; struct list_head tex; struct list_head vtx; diff --git a/src/gallium/drivers/r600/r600_blit.c b/src/gallium/drivers/r600/r600_blit.c index 54fbc50bbc..0506e8280a 100644 --- a/src/gallium/drivers/r600/r600_blit.c +++ b/src/gallium/drivers/r600/r600_blit.c @@ -283,7 +283,7 @@ static void r600_blit_state_vs_shader(struct r600_screen *rscreen, struct radeon radeon_ws_bo_reference(rscreen->rw, &bo, NULL); return; } - switch (rscreen->chip_class) { + switch (radeon_get_family_class(rscreen->rw)) { case R600: memcpy(data, shader_bc_r600, 128); break; @@ -347,7 +347,7 @@ static void r600_blit_state_ps_shader(struct r600_screen *rscreen, struct radeon radeon_ws_bo_reference(rscreen->rw, &bo, NULL); return; } - switch (rscreen->chip_class) { + switch (radeon_get_family_class(rscreen->rw)) { case R600: memcpy(data, shader_bc_r600, 48); break; diff --git a/src/gallium/drivers/r600/r600_buffer.c b/src/gallium/drivers/r600/r600_buffer.c index dc3fc812e1..ea370782fd 100644 --- a/src/gallium/drivers/r600/r600_buffer.c +++ b/src/gallium/drivers/r600/r600_buffer.c @@ -171,7 +171,7 @@ static void *r600_buffer_transfer_map(struct pipe_context *pipe, if (transfer->usage & PIPE_TRANSFER_WRITE) { write = 1; } - data = radeon_ws_bo_map(rscreen->rw, rbuffer->r.bo, transfer->usage, rctx); + data = radeon_ws_bo_map(rscreen->rw, rbuffer->r.bo, transfer->usage, pipe); if (!data) return NULL; diff --git a/src/gallium/drivers/r600/r600_context.c b/src/gallium/drivers/r600/r600_context.c index 776dc24569..7a63d966eb 100644 --- a/src/gallium/drivers/r600/r600_context.c +++ b/src/gallium/drivers/r600/r600_context.c @@ -69,6 +69,10 @@ void r600_flush(struct pipe_context *ctx, unsigned flags, { struct r600_context *rctx = r600_context(ctx); struct r600_query *rquery = NULL; +#if 0 + static int dc = 0; + char dname[256]; +#endif /* flush upload buffers */ u_upload_flush(rctx->upload_vb); @@ -77,10 +81,20 @@ void r600_flush(struct pipe_context *ctx, unsigned flags, /* suspend queries */ r600_queries_suspend(ctx); + +#if 0 + sprintf(dname, "gallium-%08d.bof", dc); + if (dc < 2) { + radeon_ctx_dump_bof(rctx->ctx, dname); + R600_ERR("dumped %s\n", dname); + } + dc++; +#endif + radeon_ctx_submit(rctx->ctx); LIST_FOR_EACH_ENTRY(rquery, &rctx->query_list, list) { - rquery->flushed = true; + rquery->flushed = TRUE; } radeon_ctx_clear(rctx->ctx); @@ -88,13 +102,6 @@ void r600_flush(struct pipe_context *ctx, unsigned flags, r600_queries_resume(ctx); } -void r600_flush_ctx(void *data) -{ - struct r600_context *rctx = data; - - rctx->context.flush(&rctx->context, 0, NULL); -} - struct pipe_context *r600_create_context(struct pipe_screen *screen, void *priv) { struct r600_context *rctx = CALLOC_STRUCT(r600_context); @@ -113,7 +120,7 @@ struct pipe_context *r600_create_context(struct pipe_screen *screen, void *priv) rctx->screen = rscreen; rctx->rw = rscreen->rw; - if (rscreen->chip_class == EVERGREEN) + if (radeon_get_family_class(rscreen->rw) == EVERGREEN) rctx->vtbl = &eg_hw_state_vtbl; else rctx->vtbl = &r600_hw_state_vtbl; diff --git a/src/gallium/drivers/r600/r600_draw.c b/src/gallium/drivers/r600/r600_draw.c index 5480ca002d..51c9b06549 100644 --- a/src/gallium/drivers/r600/r600_draw.c +++ b/src/gallium/drivers/r600/r600_draw.c @@ -31,16 +31,44 @@ #include <util/u_math.h> #include <util/u_inlines.h> #include <util/u_memory.h> +#include <util/u_index_modify.h> #include "radeon.h" #include "r600_screen.h" #include "r600_context.h" #include "r600_resource.h" #include "r600_state_inlines.h" +static void r600_translate_index_buffer(struct r600_context *r600, + struct pipe_resource **index_buffer, + unsigned *index_size, unsigned index_offset, + unsigned *start, unsigned count) +{ + switch (*index_size) { + case 1: + util_shorten_ubyte_elts(&r600->context, index_buffer, index_offset, *start, count); + *index_size = 2; + *start = 0; + break; + + case 2: + if (*start % 2 != 0 || index_offset) { + util_rebuild_ushort_elts(&r600->context, index_buffer, index_offset, *start, count); + *start = 0; + } + break; + + case 4: + if (index_offset) { + util_rebuild_uint_elts(&r600->context, index_buffer, index_offset, *start, count); + *start = 0; + } + break; + } +} + static int r600_draw_common(struct r600_draw *draw) { struct r600_context *rctx = r600_context(draw->ctx); - struct r600_screen *rscreen = rctx->screen; /* FIXME vs_resource */ struct radeon_state *vs_resource; struct r600_resource *rbuffer; @@ -128,7 +156,7 @@ void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info) if (rctx->any_user_vbs) { r600_upload_user_buffers(rctx); - rctx->any_user_vbs = false; + rctx->any_user_vbs = FALSE; } draw.ctx = ctx; @@ -136,14 +164,19 @@ void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info) draw.start = info->start; draw.count = info->count; if (info->indexed && rctx->index_buffer.buffer) { + draw.start += rctx->index_buffer.offset / rctx->index_buffer.index_size; draw.min_index = info->min_index; draw.max_index = info->max_index; + + r600_translate_index_buffer(rctx, &rctx->index_buffer.buffer, + &rctx->index_buffer.index_size, + rctx->index_buffer.offset, &draw.start, + info->count); + draw.index_size = rctx->index_buffer.index_size; draw.index_buffer = rctx->index_buffer.buffer; draw.index_buffer_offset = rctx->index_buffer.offset; - assert(rctx->index_buffer.offset % - rctx->index_buffer.index_size == 0); r600_upload_index_buffer(rctx, &draw); } else { diff --git a/src/gallium/drivers/r600/r600_hw_states.c b/src/gallium/drivers/r600/r600_hw_states.c index bca78ee8de..271bd1ac50 100644 --- a/src/gallium/drivers/r600/r600_hw_states.c +++ b/src/gallium/drivers/r600/r600_hw_states.c @@ -197,7 +197,7 @@ static void r600_rasterizer(struct r600_context *rctx, struct radeon_state *rsta float offset_units = 0, offset_scale = 0; char depth = 0; unsigned offset_db_fmt_cntl = 0; - unsigned tmp; + unsigned point_size; unsigned prov_vtx = 1; if (rctx->clip) @@ -232,7 +232,8 @@ static void r600_rasterizer(struct r600_context *rctx, struct radeon_state *rsta rctx->flat_shade = state->flatshade; radeon_state_init(rstate, rscreen->rw, R600_STATE_RASTERIZER, 0, 0); - rstate->states[R600_RASTERIZER__SPI_INTERP_CONTROL_0] = 0x00000001; + rstate->states[R600_RASTERIZER__SPI_INTERP_CONTROL_0] = + S_0286D4_FLAT_SHADE_ENA(1); if (state->sprite_coord_enable) { rstate->states[R600_RASTERIZER__SPI_INTERP_CONTROL_0] |= S_0286D4_PNT_SPRITE_ENA(1) | @@ -247,9 +248,18 @@ static void r600_rasterizer(struct r600_context *rctx, struct radeon_state *rsta } rstate->states[R600_RASTERIZER__PA_CL_CLIP_CNTL] = 0; if (clip) { - rstate->states[R600_RASTERIZER__PA_CL_CLIP_CNTL] = S_028810_PS_UCP_MODE(3) | ((1 << clip->nr) - 1); - rstate->states[R600_RASTERIZER__PA_CL_CLIP_CNTL] |= S_028810_ZCLIP_NEAR_DISABLE(clip->depth_clamp); - rstate->states[R600_RASTERIZER__PA_CL_CLIP_CNTL] |= S_028810_ZCLIP_FAR_DISABLE(clip->depth_clamp); + /* Clip plane enable bits are stashed in the lower six bits of + * PA_CL_CLIP_CNTL, so just set all of the corresponding bits with a + * pinch of bit twiddling. + * + * PS_UCP_MODE 3 is "expand and clip as trifan," which is the same + * setting that we use on r300-r500. I believe that fglrx always uses + * this mode as well. */ + rstate->states[R600_RASTERIZER__PA_CL_CLIP_CNTL] = + ((1 << clip->nr) - 1) | + S_028810_PS_UCP_MODE(3) | + S_028810_ZCLIP_NEAR_DISABLE(clip->depth_clamp) | + S_028810_ZCLIP_FAR_DISABLE(clip->depth_clamp); } rstate->states[R600_RASTERIZER__PA_SU_SC_MODE_CNTL] = S_028814_PROVOKING_VTX_LAST(prov_vtx) | @@ -263,18 +273,24 @@ static void r600_rasterizer(struct r600_context *rctx, struct radeon_state *rsta S_02881C_USE_VTX_POINT_SIZE(state->point_size_per_vertex) | S_02881C_VS_OUT_MISC_VEC_ENA(state->point_size_per_vertex); rstate->states[R600_RASTERIZER__PA_CL_NANINF_CNTL] = 0x00000000; - /* point size 12.4 fixed point */ - tmp = (unsigned)(state->point_size * 8.0); - rstate->states[R600_RASTERIZER__PA_SU_POINT_SIZE] = S_028A00_HEIGHT(tmp) | S_028A00_WIDTH(tmp); - rstate->states[R600_RASTERIZER__PA_SU_POINT_MINMAX] = 0x80000000; - rstate->states[R600_RASTERIZER__PA_SU_LINE_CNTL] = 0x00000008; + /* Point size for PA_SU_POINT_SIZE and PA_SU_POINT_MINMAX is fixed-point, + * 12.4. + * + * For some reason, maximum point size is set to 0x8000 (2048.0) instead + * of the maximum value 0xFFF0 (4095.0). */ + point_size = (unsigned)(state->point_size * 8.0); + rstate->states[R600_RASTERIZER__PA_SU_POINT_SIZE] = + S_028A00_HEIGHT(point_size) | S_028A00_WIDTH(point_size); + rstate->states[R600_RASTERIZER__PA_SU_POINT_MINMAX] = + S_028A04_MIN_SIZE(0) | S_028A04_MAX_SIZE(0x8000); + rstate->states[R600_RASTERIZER__PA_SU_LINE_CNTL] = S_028A08_WIDTH(8); rstate->states[R600_RASTERIZER__PA_SC_LINE_STIPPLE] = 0x00000005; rstate->states[R600_RASTERIZER__PA_SC_MPASS_PS_CNTL] = 0x00000000; - rstate->states[R600_RASTERIZER__PA_SC_LINE_CNTL] = 0x00000400; - rstate->states[R600_RASTERIZER__PA_CL_GB_VERT_CLIP_ADJ] = 0x3F800000; - rstate->states[R600_RASTERIZER__PA_CL_GB_VERT_DISC_ADJ] = 0x3F800000; - rstate->states[R600_RASTERIZER__PA_CL_GB_HORZ_CLIP_ADJ] = 0x3F800000; - rstate->states[R600_RASTERIZER__PA_CL_GB_HORZ_DISC_ADJ] = 0x3F800000; + rstate->states[R600_RASTERIZER__PA_SC_LINE_CNTL] = S_028C00_LAST_PIXEL(1); + rstate->states[R600_RASTERIZER__PA_CL_GB_VERT_CLIP_ADJ] = fui(1); + rstate->states[R600_RASTERIZER__PA_CL_GB_VERT_DISC_ADJ] = fui(1); + rstate->states[R600_RASTERIZER__PA_CL_GB_HORZ_CLIP_ADJ] = fui(1); + rstate->states[R600_RASTERIZER__PA_CL_GB_HORZ_DISC_ADJ] = fui(1); rstate->states[R600_RASTERIZER__PA_SU_POLY_OFFSET_DB_FMT_CNTL] = offset_db_fmt_cntl; rstate->states[R600_RASTERIZER__PA_SU_POLY_OFFSET_CLAMP] = 0x00000000; rstate->states[R600_RASTERIZER__PA_SU_POLY_OFFSET_FRONT_SCALE] = fui(offset_scale); @@ -314,7 +330,8 @@ static void r600_scissor(struct r600_context *rctx, struct radeon_state *rstate) rstate->states[R600_SCISSOR__PA_SC_WINDOW_OFFSET] = 0x00000000; rstate->states[R600_SCISSOR__PA_SC_WINDOW_SCISSOR_TL] = tl; rstate->states[R600_SCISSOR__PA_SC_WINDOW_SCISSOR_BR] = br; - rstate->states[R600_SCISSOR__PA_SC_CLIPRECT_RULE] = 0x0000FFFF; + rstate->states[R600_SCISSOR__PA_SC_CLIPRECT_RULE] = + S_02820C_CLIP_RULE(0xFFFF); rstate->states[R600_SCISSOR__PA_SC_CLIPRECT_0_TL] = tl; rstate->states[R600_SCISSOR__PA_SC_CLIPRECT_0_BR] = br; rstate->states[R600_SCISSOR__PA_SC_CLIPRECT_1_TL] = tl; @@ -339,15 +356,22 @@ static void r600_viewport(struct r600_context *rctx, struct radeon_state *rstate struct r600_screen *rscreen = rctx->screen; radeon_state_init(rstate, rscreen->rw, R600_STATE_VIEWPORT, 0, 0); - rstate->states[R600_VIEWPORT__PA_SC_VPORT_ZMIN_0] = 0x00000000; - rstate->states[R600_VIEWPORT__PA_SC_VPORT_ZMAX_0] = 0x3F800000; + rstate->states[R600_VIEWPORT__PA_SC_VPORT_ZMIN_0] = fui(0); + rstate->states[R600_VIEWPORT__PA_SC_VPORT_ZMAX_0] = fui(1); rstate->states[R600_VIEWPORT__PA_CL_VPORT_XSCALE_0] = fui(state->scale[0]); rstate->states[R600_VIEWPORT__PA_CL_VPORT_YSCALE_0] = fui(state->scale[1]); rstate->states[R600_VIEWPORT__PA_CL_VPORT_ZSCALE_0] = fui(state->scale[2]); rstate->states[R600_VIEWPORT__PA_CL_VPORT_XOFFSET_0] = fui(state->translate[0]); rstate->states[R600_VIEWPORT__PA_CL_VPORT_YOFFSET_0] = fui(state->translate[1]); rstate->states[R600_VIEWPORT__PA_CL_VPORT_ZOFFSET_0] = fui(state->translate[2]); - rstate->states[R600_VIEWPORT__PA_CL_VTE_CNTL] = 0x0000043F; + rstate->states[R600_VIEWPORT__PA_CL_VTE_CNTL] = + S_028818_VPORT_X_SCALE_ENA(1) | + S_028818_VPORT_X_OFFSET_ENA(1) | + S_028818_VPORT_Y_SCALE_ENA(1) | + S_028818_VPORT_Y_OFFSET_ENA(1) | + S_028818_VPORT_Z_SCALE_ENA(1) | + S_028818_VPORT_Z_OFFSET_ENA(1) | + S_028818_VTX_W0_FMT(1); radeon_state_pm4(rstate); } @@ -368,9 +392,8 @@ static void r600_dsa(struct r600_context *rctx, struct radeon_state *rstate) } radeon_state_init(rstate, rscreen->rw, R600_STATE_DSA, 0, 0); - db_shader_control = 0; - db_shader_control |= S_02880C_DUAL_EXPORT_ENABLE(1); - db_shader_control |= S_02880C_Z_ORDER(V_02880C_EARLY_Z_THEN_LATE_Z); + db_shader_control = S_02880C_DUAL_EXPORT_ENABLE(1) | + S_02880C_Z_ORDER(V_02880C_EARLY_Z_THEN_LATE_Z); rshader = &rctx->ps_shader->shader; if (rshader->uses_kill) @@ -384,35 +407,37 @@ static void r600_dsa(struct r600_context *rctx, struct radeon_state *rstate) db_depth_control = S_028800_Z_ENABLE(state->depth.enabled) | S_028800_Z_WRITE_ENABLE(state->depth.writemask) | S_028800_ZFUNC(state->depth.func); - /* set stencil enable */ + /* set stencil enable */ if (state->stencil[0].enabled) { - db_depth_control |= S_028800_STENCIL_ENABLE(1); - db_depth_control |= S_028800_STENCILFUNC(r600_translate_ds_func(state->stencil[0].func)); - db_depth_control |= S_028800_STENCILFAIL(r600_translate_stencil_op(state->stencil[0].fail_op)); - db_depth_control |= S_028800_STENCILZPASS(r600_translate_stencil_op(state->stencil[0].zpass_op)); - db_depth_control |= S_028800_STENCILZFAIL(r600_translate_stencil_op(state->stencil[0].zfail_op)); + db_depth_control |= S_028800_STENCIL_ENABLE(1) | + S_028800_STENCILFUNC(r600_translate_ds_func(state->stencil[0].func)) | + S_028800_STENCILFAIL(r600_translate_stencil_op(state->stencil[0].fail_op)) | + S_028800_STENCILZPASS(r600_translate_stencil_op(state->stencil[0].zpass_op)) | + S_028800_STENCILZFAIL(r600_translate_stencil_op(state->stencil[0].zfail_op)); stencil_ref_mask = S_028430_STENCILMASK(state->stencil[0].valuemask) | - S_028430_STENCILWRITEMASK(state->stencil[0].writemask); - stencil_ref_mask |= S_028430_STENCILREF(stencil_ref->ref_value[0]); + S_028430_STENCILWRITEMASK(state->stencil[0].writemask) | + S_028430_STENCILREF(stencil_ref->ref_value[0]); + if (state->stencil[1].enabled) { - db_depth_control |= S_028800_BACKFACE_ENABLE(1); - db_depth_control |= S_028800_STENCILFUNC_BF(r600_translate_ds_func(state->stencil[1].func)); - db_depth_control |= S_028800_STENCILFAIL_BF(r600_translate_stencil_op(state->stencil[1].fail_op)); - db_depth_control |= S_028800_STENCILZPASS_BF(r600_translate_stencil_op(state->stencil[1].zpass_op)); - db_depth_control |= S_028800_STENCILZFAIL_BF(r600_translate_stencil_op(state->stencil[1].zfail_op)); - stencil_ref_mask_bf = S_028434_STENCILMASK_BF(state->stencil[1].valuemask) | - S_028434_STENCILWRITEMASK_BF(state->stencil[1].writemask); - stencil_ref_mask_bf |= S_028430_STENCILREF(stencil_ref->ref_value[1]); + db_depth_control |= S_028800_BACKFACE_ENABLE(1) | + S_028800_STENCILFUNC_BF(r600_translate_ds_func(state->stencil[1].func)) | + S_028800_STENCILFAIL_BF(r600_translate_stencil_op(state->stencil[1].fail_op)) | + S_028800_STENCILZPASS_BF(r600_translate_stencil_op(state->stencil[1].zpass_op)) | + S_028800_STENCILZFAIL_BF(r600_translate_stencil_op(state->stencil[1].zfail_op)); + stencil_ref_mask_bf = + S_028434_STENCILMASK_BF(state->stencil[1].valuemask) | + S_028434_STENCILWRITEMASK_BF(state->stencil[1].writemask) | + S_028430_STENCILREF(stencil_ref->ref_value[1]); } } alpha_test_control = 0; alpha_ref = 0; if (state->alpha.enabled) { - alpha_test_control = S_028410_ALPHA_FUNC(state->alpha.func); - alpha_test_control |= S_028410_ALPHA_TEST_ENABLE(1); + alpha_test_control = S_028410_ALPHA_FUNC(state->alpha.func) | + S_028410_ALPHA_TEST_ENABLE(1); alpha_ref = fui(state->alpha.ref_value); } @@ -422,22 +447,22 @@ static void r600_dsa(struct r600_context *rctx, struct radeon_state *rstate) S_028D10_FORCE_HIS_ENABLE0(V_028D10_FORCE_DISABLE) | S_028D10_FORCE_HIS_ENABLE1(V_028D10_FORCE_DISABLE); - query_running = false; + query_running = FALSE; LIST_FOR_EACH_ENTRY(rquery, &rctx->query_list, list) { if (rquery->state & R600_QUERY_STATE_STARTED) { - query_running = true; + query_running = TRUE; } } if (query_running) { db_render_override |= S_028D10_NOOP_CULL_DISABLE(1); - if (rscreen->chip_class == R700) + if (radeon_get_family_class(rscreen->rw) == R700) db_render_control |= S_028D0C_R700_PERFECT_ZPASS_COUNTS(1); } rstate->states[R600_DSA__DB_STENCIL_CLEAR] = 0x00000000; - rstate->states[R600_DSA__DB_DEPTH_CLEAR] = 0x3F800000; + rstate->states[R600_DSA__DB_DEPTH_CLEAR] = fui(1); rstate->states[R600_DSA__SX_ALPHA_TEST_CONTROL] = alpha_test_control; rstate->states[R600_DSA__DB_STENCILREFMASK] = stencil_ref_mask; rstate->states[R600_DSA__DB_STENCILREFMASK_BF] = stencil_ref_mask_bf; @@ -515,7 +540,7 @@ static void r600_resource(struct pipe_context *ctx, struct radeon_state *rstate, struct r600_context *rctx = r600_context(ctx); struct r600_screen *rscreen = rctx->screen; const struct util_format_description *desc; - struct r600_resource_texture *tmp; + struct r600_resource_texture *texture; struct r600_resource *rbuffer; unsigned format; uint32_t word4 = 0, yuv_format = 0, pitch = 0; @@ -539,15 +564,15 @@ static void r600_resource(struct pipe_context *ctx, struct radeon_state *rstate, return; } radeon_state_init(rstate, rscreen->rw, R600_STATE_RESOURCE, id, R600_SHADER_PS); - tmp = (struct r600_resource_texture*)view->texture; - rbuffer = &tmp->resource; - if (tmp->depth) { - r = r600_texture_from_depth(ctx, tmp, view->first_level); + texture = (struct r600_resource_texture*)view->texture; + rbuffer = &texture->resource; + if (texture->depth) { + r = r600_texture_from_depth(ctx, texture, view->first_level); if (r) { return; } - radeon_ws_bo_reference(rscreen->rw, &rstate->bo[0], tmp->uncompressed); - radeon_ws_bo_reference(rscreen->rw, &rstate->bo[1], tmp->uncompressed); + radeon_ws_bo_reference(rscreen->rw, &rstate->bo[0], texture->uncompressed); + radeon_ws_bo_reference(rscreen->rw, &rstate->bo[1], texture->uncompressed); } else { radeon_ws_bo_reference(rscreen->rw, &rstate->bo[0], rbuffer->bo); radeon_ws_bo_reference(rscreen->rw, &rstate->bo[1], rbuffer->bo); @@ -558,8 +583,7 @@ static void r600_resource(struct pipe_context *ctx, struct radeon_state *rstate, rstate->placement[2] = RADEON_GEM_DOMAIN_GTT; rstate->placement[3] = RADEON_GEM_DOMAIN_GTT; - pitch = (tmp->pitch[0] / tmp->bpt); - pitch = (pitch + 0x7) & ~0x7; + pitch = align(texture->pitch[0] / texture->bpt, 8); /* FIXME properly handle first level != 0 */ rstate->states[R600_PS_RESOURCE__RESOURCE0_WORD0] = @@ -572,8 +596,8 @@ static void r600_resource(struct pipe_context *ctx, struct radeon_state *rstate, S_038004_TEX_HEIGHT(view->texture->height0 - 1) | S_038004_TEX_DEPTH(view->texture->depth0 - 1) | S_038004_DATA_FORMAT(format); - rstate->states[R600_PS_RESOURCE__RESOURCE0_WORD2] = tmp->offset[0] >> 8; - rstate->states[R600_PS_RESOURCE__RESOURCE0_WORD3] = tmp->offset[1] >> 8; + rstate->states[R600_PS_RESOURCE__RESOURCE0_WORD2] = texture->offset[0] >> 8; + rstate->states[R600_PS_RESOURCE__RESOURCE0_WORD3] = texture->offset[1] >> 8; rstate->states[R600_PS_RESOURCE__RESOURCE0_WORD4] = word4 | S_038010_NUM_FORMAT_ALL(V_038010_SQ_NUM_FORMAT_NORM) | @@ -594,15 +618,17 @@ static void r600_cb_cntl(struct r600_context *rctx, struct radeon_state *rstate) struct r600_screen *rscreen = rctx->screen; const struct pipe_blend_state *pbs = &rctx->blend->state.blend; int nr_cbufs = rctx->framebuffer->state.framebuffer.nr_cbufs; - uint32_t color_control, target_mask, shader_mask; + uint32_t color_control, target_mask, shader_mask, shader_control; int i; target_mask = 0; shader_mask = 0; + shader_control = 0; color_control = S_028808_PER_MRT_BLEND(1); for (i = 0; i < nr_cbufs; i++) { shader_mask |= 0xf << (i * 4); + shader_control |= (1 << i); } if (pbs->logicop_enable) { @@ -630,6 +656,8 @@ static void r600_cb_cntl(struct r600_context *rctx, struct radeon_state *rstate) rstate->states[R600_CB_CNTL__CB_SHADER_MASK] = shader_mask; rstate->states[R600_CB_CNTL__CB_TARGET_MASK] = target_mask; rstate->states[R600_CB_CNTL__CB_COLOR_CONTROL] = color_control; + if (radeon_get_family_class(rscreen->rw) == R700) + rstate->states[R600_CB_CNTL__CB_SHADER_CONTROL] = shader_control; rstate->states[R600_CB_CNTL__PA_SC_AA_CONFIG] = 0x00000000; rstate->states[R600_CB_CNTL__PA_SC_AA_SAMPLE_LOCS_MCTX] = 0x00000000; rstate->states[R600_CB_CNTL__PA_SC_AA_SAMPLE_LOCS_8S_WD1_MCTX] = 0x00000000; @@ -831,21 +859,35 @@ static void r600_init_config(struct r600_context *rctx) rctx->config.states[R600_CONFIG__SX_MISC] = 0x00000000; if (family >= CHIP_RV770) { - rctx->config.states[R600_CONFIG__SQ_DYN_GPR_CNTL_PS_FLUSH_REQ] = 0x00004000; + rctx->config.states[R600_CONFIG__SQ_DYN_GPR_CNTL_PS_FLUSH_REQ] = + S_008D8C_VS_PC_LIMIT_ENABLE(1); rctx->config.states[R600_CONFIG__TA_CNTL_AUX] = 0x07000002; rctx->config.states[R600_CONFIG__DB_DEBUG] = 0x00000000; - rctx->config.states[R600_CONFIG__DB_WATERMARKS] = 0x00420204; + rctx->config.states[R600_CONFIG__DB_WATERMARKS] = + S_009838_DEPTH_FREE(4) | + S_009838_DEPTH_FLUSH(16) | + S_009838_DEPTH_PENDING_FREE(4) | + S_009838_DEPTH_CACHELINE_FREE(4); rctx->config.states[R600_CONFIG__SPI_THREAD_GROUPING] = 0x00000000; - rctx->config.states[R600_CONFIG__PA_SC_MODE_CNTL] = 0x00514000; + rctx->config.states[R600_CONFIG__PA_SC_MODE_CNTL] = 0x00500000 | + S_028A4C_FORCE_EOV_CNTDWN_ENABLE(1) | + S_028A4C_FORCE_EOV_REZ_ENABLE(1); } else { rctx->config.states[R600_CONFIG__SQ_DYN_GPR_CNTL_PS_FLUSH_REQ] = 0x00000000; - rctx->config.states[R600_CONFIG__TA_CNTL_AUX] = 0x07000003; + rctx->config.states[R600_CONFIG__TA_CNTL_AUX] = 0x07000002 | + S_009508_DISABLE_CUBE_WRAP(1); rctx->config.states[R600_CONFIG__DB_DEBUG] = 0x82000000; - rctx->config.states[R600_CONFIG__DB_WATERMARKS] = 0x01020204; - rctx->config.states[R600_CONFIG__SPI_THREAD_GROUPING] = 0x00000001; - rctx->config.states[R600_CONFIG__PA_SC_MODE_CNTL] = 0x00004010; + rctx->config.states[R600_CONFIG__DB_WATERMARKS] = + S_009838_DEPTH_FREE(4) | + S_009838_DEPTH_FLUSH(16) | + S_009838_DEPTH_PENDING_FREE(4) | + S_009838_DEPTH_CACHELINE_FREE(16); + rctx->config.states[R600_CONFIG__SPI_THREAD_GROUPING] = + S_0286C8_PS_GROUPING(1); + rctx->config.states[R600_CONFIG__PA_SC_MODE_CNTL] = + S_028A4C_WALK_ORDER_ENABLE(1) | + S_028A4C_FORCE_EOV_CNTDWN_ENABLE(1); } - rctx->config.states[R600_CONFIG__CB_SHADER_CONTROL] = 0x00000003; rctx->config.states[R600_CONFIG__SQ_ESGS_RING_ITEMSIZE] = 0x00000000; rctx->config.states[R600_CONFIG__SQ_GSVS_RING_ITEMSIZE] = 0x00000000; rctx->config.states[R600_CONFIG__SQ_ESTMP_RING_ITEMSIZE] = 0x00000000; @@ -869,7 +911,7 @@ static void r600_init_config(struct r600_context *rctx) rctx->config.states[R600_CONFIG__VGT_GROUP_VECT_1_FMT_CNTL] = 0x00000000; rctx->config.states[R600_CONFIG__VGT_GS_MODE] = 0x00000000; rctx->config.states[R600_CONFIG__VGT_STRMOUT_EN] = 0x00000000; - rctx->config.states[R600_CONFIG__VGT_REUSE_OFF] = 0x00000001; + rctx->config.states[R600_CONFIG__VGT_REUSE_OFF] = S_028AB4_REUSE_OFF(1); rctx->config.states[R600_CONFIG__VGT_VTX_CNT_EN] = 0x00000000; rctx->config.states[R600_CONFIG__VGT_STRMOUT_BUFFER_EN] = 0x00000000; radeon_state_pm4(&rctx->config); @@ -943,21 +985,24 @@ static int r600_ps_shader(struct r600_context *rctx, struct r600_context_state * const struct pipe_rasterizer_state *rasterizer; struct r600_shader *rshader = &rpshader->shader; unsigned i, tmp, exports_ps, num_cout; - boolean have_pos = FALSE; + boolean have_pos = FALSE, have_face = FALSE; rasterizer = &rctx->rasterizer->state.rasterizer; radeon_state_init(state, rscreen->rw, R600_STATE_SHADER, 0, R600_SHADER_PS); for (i = 0; i < rshader->ninput; i++) { - tmp = S_028644_SEMANTIC(i); - tmp |= S_028644_SEL_CENTROID(1); + tmp = S_028644_SEMANTIC(i) | S_028644_SEL_CENTROID(1); if (rshader->input[i].name == TGSI_SEMANTIC_POSITION) have_pos = TRUE; if (rshader->input[i].name == TGSI_SEMANTIC_COLOR || - rshader->input[i].name == TGSI_SEMANTIC_BCOLOR || - rshader->input[i].name == TGSI_SEMANTIC_POSITION) { + rshader->input[i].name == TGSI_SEMANTIC_BCOLOR || + rshader->input[i].name == TGSI_SEMANTIC_POSITION) { tmp |= S_028644_FLAT_SHADE(rshader->flat_shade); } + + if (rshader->input[i].name == TGSI_SEMANTIC_FACE) + have_face = TRUE; + if (rasterizer->sprite_coord_enable & (1 << i)) { tmp |= S_028644_PT_SPRITE_TEX(1); } @@ -968,25 +1013,33 @@ static int r600_ps_shader(struct r600_context *rctx, struct r600_context_state * num_cout = 0; for (i = 0; i < rshader->noutput; i++) { if (rshader->output[i].name == TGSI_SEMANTIC_POSITION) - exports_ps |= 1; + exports_ps |= S_028854_EXPORT_Z(1); else if (rshader->output[i].name == TGSI_SEMANTIC_COLOR) { - exports_ps |= (1 << (num_cout+1)); num_cout++; } } - if (!exports_ps) { - /* always at least export 1 component per pixel */ - exports_ps = 2; + exports_ps |= S_028854_EXPORT_COLORS(num_cout); + if (exports_ps == 0) { + /* Always at least export 1 color component per pixel. */ + exports_ps = S_028854_EXPORT_COLORS(1); } - state->states[R600_PS_SHADER__SPI_PS_IN_CONTROL_0] = S_0286CC_NUM_INTERP(rshader->ninput) | - S_0286CC_PERSP_GRADIENT_ENA(1); + state->states[R600_PS_SHADER__SPI_PS_IN_CONTROL_0] = + S_0286CC_NUM_INTERP(rshader->ninput) | + S_0286CC_PERSP_GRADIENT_ENA(1); + if (have_pos) { - state->states[R600_PS_SHADER__SPI_PS_IN_CONTROL_0] |= S_0286CC_POSITION_ENA(1) | - S_0286CC_BARYC_SAMPLE_CNTL(1); - state->states[R600_PS_SHADER__SPI_INPUT_Z] |= 1; + state->states[R600_PS_SHADER__SPI_PS_IN_CONTROL_0] |= + S_0286CC_POSITION_ENA(1) | + S_0286CC_BARYC_SAMPLE_CNTL(1); + state->states[R600_PS_SHADER__SPI_INPUT_Z] |= + S_0286D8_PROVIDE_Z_TO_SPI(1); } - state->states[R600_PS_SHADER__SPI_PS_IN_CONTROL_1] = 0x00000000; - state->states[R600_PS_SHADER__SQ_PGM_RESOURCES_PS] = S_028868_NUM_GPRS(rshader->bc.ngpr) | + + state->states[R600_PS_SHADER__SPI_PS_IN_CONTROL_1] = + S_0286D0_FRONT_FACE_ENA(have_face); + + state->states[R600_PS_SHADER__SQ_PGM_RESOURCES_PS] = + S_028868_NUM_GPRS(rshader->bc.ngpr) | S_028868_STACK_SIZE(rshader->bc.nstack); state->states[R600_PS_SHADER__SQ_PGM_EXPORTS_PS] = exports_ps; radeon_ws_bo_reference(rscreen->rw, &state->bo[0], rpshader->bo); @@ -1011,8 +1064,10 @@ static int r600_vs_shader(struct r600_context *rctx, struct r600_context_state * tmp = i << ((i & 3) * 8); state->states[R600_VS_SHADER__SPI_VS_OUT_ID_0 + i / 4] |= tmp; } - state->states[R600_VS_SHADER__SPI_VS_OUT_CONFIG] = S_0286C4_VS_EXPORT_COUNT(rshader->noutput - 2); - state->states[R600_VS_SHADER__SQ_PGM_RESOURCES_VS] = S_028868_NUM_GPRS(rshader->bc.ngpr) | + state->states[R600_VS_SHADER__SPI_VS_OUT_CONFIG] = + S_0286C4_VS_EXPORT_COUNT(rshader->noutput - 2); + state->states[R600_VS_SHADER__SQ_PGM_RESOURCES_VS] = + S_028868_NUM_GPRS(rshader->bc.ngpr) | S_028868_STACK_SIZE(rshader->bc.nstack); radeon_ws_bo_reference(rscreen->rw, &state->bo[0], rpshader->bo); radeon_ws_bo_reference(rscreen->rw, &state->bo[1], rpshader->bo); @@ -1151,20 +1206,31 @@ static void r600_texture_state_db(struct r600_screen *rscreen, struct r600_resou static void r600_texture_state_viewport(struct r600_screen *rscreen, struct r600_resource_texture *rtexture, unsigned level) { struct radeon_state *rstate = &rtexture->viewport[level]; + float width, height; radeon_state_init(rstate, rscreen->rw, R600_STATE_VIEWPORT, 0, 0); + width = rtexture->width[level] * 0.5; + height = rtexture->height[level] * 0.5; + /* set states (most default value are 0 and struct already * initialized to 0, thus avoid resetting them) */ - rstate->states[R600_VIEWPORT__PA_CL_VPORT_XOFFSET_0] = fui((float)rtexture->width[level]/2.0); - rstate->states[R600_VIEWPORT__PA_CL_VPORT_XSCALE_0] = fui((float)rtexture->width[level]/2.0); - rstate->states[R600_VIEWPORT__PA_CL_VPORT_YOFFSET_0] = fui((float)rtexture->height[level]/2.0); - rstate->states[R600_VIEWPORT__PA_CL_VPORT_YSCALE_0] = fui((float)-rtexture->height[level]/2.0); - rstate->states[R600_VIEWPORT__PA_CL_VPORT_ZOFFSET_0] = 0x3F000000; - rstate->states[R600_VIEWPORT__PA_CL_VPORT_ZSCALE_0] = 0x3F000000; - rstate->states[R600_VIEWPORT__PA_CL_VTE_CNTL] = 0x0000043F; - rstate->states[R600_VIEWPORT__PA_SC_VPORT_ZMAX_0] = 0x3F800000; + rstate->states[R600_VIEWPORT__PA_CL_VPORT_XOFFSET_0] = fui(width); + rstate->states[R600_VIEWPORT__PA_CL_VPORT_XSCALE_0] = fui(width); + rstate->states[R600_VIEWPORT__PA_CL_VPORT_YOFFSET_0] = fui(height); + rstate->states[R600_VIEWPORT__PA_CL_VPORT_YSCALE_0] = fui(height); + rstate->states[R600_VIEWPORT__PA_CL_VPORT_ZOFFSET_0] = fui(0.5); + rstate->states[R600_VIEWPORT__PA_CL_VPORT_ZSCALE_0] = fui(0.5); + rstate->states[R600_VIEWPORT__PA_CL_VTE_CNTL] = + S_028818_VPORT_X_SCALE_ENA(1) | + S_028818_VPORT_X_OFFSET_ENA(1) | + S_028818_VPORT_Y_SCALE_ENA(1) | + S_028818_VPORT_Y_OFFSET_ENA(1) | + S_028818_VPORT_Z_SCALE_ENA(1) | + S_028818_VPORT_Z_OFFSET_ENA(1) | + S_028818_VTX_W0_FMT(1); + rstate->states[R600_VIEWPORT__PA_SC_VPORT_ZMAX_0] = fui(1); radeon_state_pm4(rstate); } @@ -1272,7 +1338,7 @@ void r600_set_constant_buffer_mem(struct pipe_context *ctx, nconstant = buffer->width0 / 16; size = ALIGN_DIVUP(nconstant, 16); - + radeon_state_init(rstate, rscreen->rw, type, 0, shader_class); rstate->states[R600_VS_CBUF__ALU_CONST_BUFFER_SIZE_VS_0] = size; rstate->states[R600_VS_CBUF__ALU_CONST_CACHE_VS_0] = 0; diff --git a/src/gallium/drivers/r600/r600_query.c b/src/gallium/drivers/r600/r600_query.c index 12900cce11..6e50701de6 100644 --- a/src/gallium/drivers/r600/r600_query.c +++ b/src/gallium/drivers/r600/r600_query.c @@ -108,8 +108,7 @@ static void r600_query_result(struct pipe_context *ctx, struct r600_query *rquer u32 *results; int i; - radeon_ws_bo_wait(rscreen->rw, rquery->buffer); - results = radeon_ws_bo_map(rscreen->rw, rquery->buffer, 0, r600_context(ctx)); + results = radeon_ws_bo_map(rscreen->rw, rquery->buffer, 0, ctx); for (i = 0; i < rquery->num_results; i += 4) { start = (u64)results[i] | (u64)results[i + 1] << 32; end = (u64)results[i + 2] | (u64)results[i + 3] << 32; @@ -133,7 +132,7 @@ static void r600_query_resume(struct pipe_context *ctx, struct r600_query *rquer r600_query_result(ctx, rquery); } r600_query_begin(rctx, rquery); - rquery->flushed = false; + rquery->flushed = FALSE; } static void r600_query_suspend(struct pipe_context *ctx, struct r600_query *rquery) @@ -152,7 +151,7 @@ static void r600_begin_query(struct pipe_context *ctx, struct pipe_query *query) rquery->state = R600_QUERY_STATE_STARTED; rquery->num_results = 0; - rquery->flushed = false; + rquery->flushed = FALSE; r600_query_resume(ctx, rquery); r = radeon_ctx_set_query_state(rctx->ctx, &rquery->rstate); if (r == -EBUSY) { @@ -232,7 +231,7 @@ static boolean r600_get_query_result(struct pipe_context *ctx, if (!rquery->flushed) { ctx->flush(ctx, 0, NULL); - rquery->flushed = true; + rquery->flushed = TRUE; } r600_query_result(ctx, rquery); *result = rquery->result; diff --git a/src/gallium/drivers/r600/r600_resource.h b/src/gallium/drivers/r600/r600_resource.h index 6ddb1ad32a..cd1c31e82d 100644 --- a/src/gallium/drivers/r600/r600_resource.h +++ b/src/gallium/drivers/r600/r600_resource.h @@ -103,7 +103,7 @@ static INLINE struct r600_resource_buffer *r600_buffer(struct pipe_resource *buf static INLINE boolean r600_buffer_is_user_buffer(struct pipe_resource *buffer) { - return r600_buffer(buffer)->user_buffer ? true : false; + return r600_buffer(buffer)->user_buffer ? TRUE : FALSE; } #endif diff --git a/src/gallium/drivers/r600/r600_screen.c b/src/gallium/drivers/r600/r600_screen.c index 1711fabfc7..d280a45bab 100644 --- a/src/gallium/drivers/r600/r600_screen.c +++ b/src/gallium/drivers/r600/r600_screen.c @@ -242,39 +242,13 @@ struct pipe_screen *r600_screen_create(struct radeon *rw) if (rscreen == NULL) { return NULL; } - + /* don't enable mem constant for r600 yet */ rscreen->use_mem_constant = FALSE; - - switch (family) { - case CHIP_R600: - case CHIP_RV610: - case CHIP_RV630: - case CHIP_RV670: - case CHIP_RV620: - case CHIP_RV635: - case CHIP_RS780: - case CHIP_RS880: - rscreen->chip_class = R600; - break; - case CHIP_RV770: - case CHIP_RV730: - case CHIP_RV710: - case CHIP_RV740: - rscreen->chip_class = R700; - break; - case CHIP_CEDAR: - case CHIP_REDWOOD: - case CHIP_JUNIPER: - case CHIP_CYPRESS: - case CHIP_HEMLOCK: - rscreen->chip_class = EVERGREEN; + if (radeon_get_family_class(rw) == EVERGREEN) { rscreen->use_mem_constant = TRUE; - break; - default: - FREE(rscreen); - return NULL; } + radeon_set_mem_constant(rw, rscreen->use_mem_constant); rscreen->rw = rw; rscreen->screen.winsys = (struct pipe_winsys*)rw; diff --git a/src/gallium/drivers/r600/r600_screen.h b/src/gallium/drivers/r600/r600_screen.h index 4be77865fb..502444f03a 100644 --- a/src/gallium/drivers/r600/r600_screen.h +++ b/src/gallium/drivers/r600/r600_screen.h @@ -42,17 +42,10 @@ struct r600_transfer { struct pipe_resource *linear_texture; }; -enum chip_class { - R600, - R700, - EVERGREEN, -}; - struct r600_screen { struct pipe_screen screen; struct radeon *rw; - enum chip_class chip_class; - boolean use_mem_constant; + boolean use_mem_constant; }; static INLINE struct r600_screen *r600_screen(struct pipe_screen *screen) diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index 4da6850b0a..e18c6ce605 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -167,7 +167,7 @@ static int r600_pipe_shader(struct pipe_context *ctx, struct r600_context_state if (rpshader->bo == NULL) { return -ENOMEM; } - data = radeon_ws_bo_map(rscreen->rw, rpshader->bo, 0, rctx); + data = radeon_ws_bo_map(rscreen->rw, rpshader->bo, 0, ctx); memcpy(data, rshader->bc.bytecode, rshader->bc.ndw * 4); radeon_ws_bo_unmap(rscreen->rw, rpshader->bo); /* build state */ @@ -514,7 +514,7 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s output[i].barrier = 1; output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM; output[i].array_base = 0; - output[i].inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT; + output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT); noutput++; } } @@ -926,38 +926,95 @@ static int tgsi_scs(struct r600_shader_ctx *ctx) struct r600_bc_alu alu; int r; - r = tgsi_setup_trig(ctx, r600_src); - if (r) - return r; - + /* We'll only need the trig stuff if we are going to write to the + * X or Y components of the destination vector. + */ + if (likely(inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY)) { + r = tgsi_setup_trig(ctx, r600_src); + if (r) + return r; + } /* dst.x = COS */ - memset(&alu, 0, sizeof(struct r600_bc_alu)); - alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS); - r = tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst); - if (r) - return r; + if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { + memset(&alu, 0, sizeof(struct r600_bc_alu)); + alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS); + r = tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst); + if (r) + return r; - alu.src[0].sel = ctx->temp_reg; - alu.src[0].chan = 0; - alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); - if (r) - return r; + alu.src[0].sel = ctx->temp_reg; + alu.src[0].chan = 0; + alu.last = 1; + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + } /* dst.y = SIN */ - memset(&alu, 0, sizeof(struct r600_bc_alu)); - alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN); - r = tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst); - if (r) - return r; + if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { + memset(&alu, 0, sizeof(struct r600_bc_alu)); + alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN); + r = tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst); + if (r) + return r; + + alu.src[0].sel = ctx->temp_reg; + alu.src[0].chan = 0; + alu.last = 1; + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + } + + /* dst.z = 0.0; */ + if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) { + memset(&alu, 0, sizeof(struct r600_bc_alu)); + + alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); + + r = tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst); + if (r) + return r; + + alu.src[0].sel = V_SQ_ALU_SRC_0; + alu.src[0].chan = 0; + + alu.last = 1; + + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + + r = r600_bc_add_literal(ctx->bc, ctx->value); + if (r) + return r; + } + + /* dst.w = 1.0; */ + if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { + memset(&alu, 0, sizeof(struct r600_bc_alu)); + + alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); + + r = tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst); + if (r) + return r; + + alu.src[0].sel = V_SQ_ALU_SRC_1; + alu.src[0].chan = 0; + + alu.last = 1; + + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + + r = r600_bc_add_literal(ctx->bc, ctx->value); + if (r) + return r; + } - alu.src[0].sel = ctx->temp_reg; - alu.src[0].chan = 0; - alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); - if (r) - return r; return 0; } @@ -1530,7 +1587,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) r = r600_bc_add_alu(ctx->bc, &alu); if (r) return r; - src_not_temp = false; + src_not_temp = FALSE; src_gpr = ctx->temp_reg; } @@ -1641,7 +1698,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) r = r600_bc_add_literal(ctx->bc, lit_vals); if (r) return r; - src_not_temp = false; + src_not_temp = FALSE; src_gpr = ctx->temp_reg; } @@ -2313,6 +2370,7 @@ static int tgsi_arl(struct r600_shader_ctx *ctx) r = r600_bc_add_alu_type(ctx->bc, &alu, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU)); if (r) return r; + ctx->bc->cf_last->r6xx_uses_waterfall = 1; return 0; } diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index 4dcdc492fc..424f7a8913 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -33,6 +33,10 @@ #include "r600_context.h" #include "r600_resource.h" +static void clean_flush(struct r600_context *rctx, struct radeon_state *flush); +static int setup_cb_flush(struct r600_context *rctx, struct radeon_state *flush); +static int setup_db_flush(struct r600_context *rctx, struct radeon_state *flush); + static struct r600_context_state *r600_new_context_state(unsigned type) { struct r600_context_state *rstate = CALLOC_STRUCT(r600_context_state); @@ -378,6 +382,14 @@ static void r600_set_framebuffer_state(struct pipe_context *ctx, struct r600_context_state *rstate; int i; + if (rctx->framebuffer) { + for (i = 0; i < rctx->framebuffer->state.framebuffer.nr_cbufs; i++) + radeon_draw_unbind(&rctx->draw, &rctx->framebuffer->rstate[i+1]); + radeon_draw_unbind(&rctx->draw, &rctx->framebuffer->rstate[0]); + } + clean_flush(rctx, &rctx->hw_states.cb_flush); + clean_flush(rctx, &rctx->hw_states.db_flush); + r600_context_state_decref(rctx->framebuffer); rstate = r600_new_context_state(pipe_framebuffer_type); @@ -393,6 +405,10 @@ static void r600_set_framebuffer_state(struct pipe_context *ctx, if (state->zsbuf) { rctx->vtbl->db(rctx, &rstate->rstate[0], state); } + /* setup flush states */ + setup_cb_flush(rctx, &rctx->hw_states.cb_flush); + setup_db_flush(rctx, &rctx->hw_states.db_flush); + return; } @@ -511,7 +527,7 @@ void r600_init_state_functions(struct r600_context *rctx) rctx->context.set_blend_color = r600_set_blend_color; rctx->context.set_clip_state = r600_set_clip_state; - if (rctx->screen->chip_class == EVERGREEN) + if (radeon_get_family_class(rctx->rw) == EVERGREEN) rctx->context.set_constant_buffer = eg_set_constant_buffer; else if (rctx->screen->use_mem_constant) rctx->context.set_constant_buffer = r600_set_constant_buffer_mem; @@ -554,6 +570,7 @@ struct r600_context_state *r600_context_state_decref(struct r600_context_state * case pipe_framebuffer_type: for (i = 0; i < rstate->state.framebuffer.nr_cbufs; i++) { pipe_surface_reference(&rstate->state.framebuffer.cbufs[i], NULL); + radeon_state_fini(&rstate->rstate[i+1]); } pipe_surface_reference(&rstate->state.framebuffer.zsbuf, NULL); break; @@ -600,6 +617,17 @@ static void r600_bind_shader_sampler(struct r600_context *rctx, struct r600_shad } } +static void clean_flush(struct r600_context *rctx, struct radeon_state *flush) +{ + struct r600_screen *rscreen = rctx->screen; + int i; + + for (i = 0 ; i < flush->nbo; i++) { + radeon_ws_bo_reference(rscreen->rw, &flush->bo[i], NULL); + } + flush->nbo = 0; + radeon_state_fini(flush); +} static int setup_cb_flush(struct r600_context *rctx, struct radeon_state *flush) { @@ -658,10 +686,6 @@ int r600_context_hw_states(struct pipe_context *ctx) rctx->vtbl->dsa(rctx, &rctx->hw_states.dsa); rctx->vtbl->cb_cntl(rctx, &rctx->hw_states.cb_cntl); - /* setup flushes */ - setup_db_flush(rctx, &rctx->hw_states.db_flush); - setup_cb_flush(rctx, &rctx->hw_states.cb_flush); - /* bind states */ radeon_draw_bind(&rctx->draw, &rctx->config); @@ -673,9 +697,6 @@ int r600_context_hw_states(struct pipe_context *ctx) radeon_draw_bind(&rctx->draw, &rctx->hw_states.db_flush); radeon_draw_bind(&rctx->draw, &rctx->hw_states.cb_flush); - radeon_draw_bind(&rctx->draw, &rctx->hw_states.db_flush); - radeon_draw_bind(&rctx->draw, &rctx->hw_states.cb_flush); - if (rctx->viewport) { radeon_draw_bind(&rctx->draw, &rctx->viewport->rstate[0]); } diff --git a/src/gallium/drivers/r600/r600_state2.c b/src/gallium/drivers/r600/r600_state2.c index 63cc19708b..5269e6db91 100644 --- a/src/gallium/drivers/r600/r600_state2.c +++ b/src/gallium/drivers/r600/r600_state2.c @@ -40,7 +40,6 @@ #include <util/u_memory.h> #include <util/u_inlines.h> #include <pipebuffer/pb_buffer.h> -#include "state_tracker/drm_driver.h" #include "r600.h" #include "r600d.h" #include "r700_sq.h" @@ -57,12 +56,6 @@ uint32_t r600_translate_texformat(enum pipe_format format, #include "r600_state_inlines.h" -enum chip_class { - R600, - R700, - EVERGREEN, -}; - enum r600_pipe_state_id { R600_PIPE_STATE_BLEND = 0, R600_PIPE_STATE_BLEND_COLOR, @@ -86,7 +79,6 @@ enum r600_pipe_state_id { struct r600_screen { struct pipe_screen screen; struct radeon *radeon; - unsigned chip_class; }; struct r600_pipe_sampler_view { @@ -206,7 +198,7 @@ static void r600_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shade r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_028894_SQ_PGM_START_FS, 0x00000000, 0xFFFFFFFF, shader->bo); - rctx->vs_rebuild = false; + rctx->vs_rebuild = FALSE; } static void r600_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader *shader) @@ -276,7 +268,7 @@ static void r600_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shade r600_pipe_state_add_reg(rstate, R600_GROUP_CONTEXT, R_0288CC_SQ_PGM_CF_OFFSET_PS, 0x00000000, 0xFFFFFFFF, NULL); - rctx->ps_rebuild = false; + rctx->ps_rebuild = FALSE; } static int r600_pipe_shader(struct pipe_context *ctx, struct r600_pipe_shader *shader) @@ -685,8 +677,10 @@ static void r600_flush2(struct pipe_context *ctx, unsigned flags, struct pipe_fence_handle **fence) { struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; +#if 0 static int dc = 0; char dname[256]; +#endif if (!rctx->ctx.pm4_cdwords) return; @@ -1081,10 +1075,10 @@ static void r600_bind_rs_state(struct pipe_context *ctx, void *state) return; if (rctx->flatshade != rs->flatshade) { - rctx->ps_rebuild = true; + rctx->ps_rebuild = TRUE; } if (rctx->sprite_coord_enable != rs->sprite_coord_enable) { - rctx->ps_rebuild = true; + rctx->ps_rebuild = TRUE; } rctx->flatshade = rs->flatshade; rctx->sprite_coord_enable = rs->sprite_coord_enable; @@ -1217,8 +1211,7 @@ static struct pipe_sampler_view *r600_create_sampler_view(struct pipe_context *c bo[1] = radeon_ws_bo_incref(rscreen->rw, tmp->uncompressed); #endif } - pitch = (tmp->pitch[0] / tmp->bpt); - pitch = (pitch + 0x7) & ~0x7; + pitch = align(tmp->pitch[0] / tmp->bpt, 8); /* FIXME properly handle first level != 0 */ r600_pipe_state_add_reg(rstate, R600_GROUP_RESOURCE, R_038000_RESOURCE0_WORD0, @@ -1371,7 +1364,7 @@ static void r600_bind_vertex_elements(struct pipe_context *ctx, void *state) rctx->vertex_elements = v; if (v) { v->refcount++; - rctx->vs_rebuild = true; + rctx->vs_rebuild = TRUE; } } @@ -1607,6 +1600,9 @@ static void r600_set_framebuffer_state(struct pipe_context *ctx, /* unreference old buffer and reference new one */ rstate->id = R600_PIPE_STATE_FRAMEBUFFER; for (int i = 0; i < rctx->framebuffer.nr_cbufs; i++) { + pipe_surface_reference(&rctx->framebuffer.cbufs[i], NULL); + } + for (int i = 0; i < state->nr_cbufs; i++) { pipe_surface_reference(&rctx->framebuffer.cbufs[i], state->cbufs[i]); } pipe_surface_reference(&rctx->framebuffer.zsbuf, state->zsbuf); @@ -2189,27 +2185,6 @@ struct pipe_screen *r600_screen_create2(struct radeon *radeon) return NULL; } - switch (family) { - case CHIP_R600: - case CHIP_RV610: - case CHIP_RV630: - case CHIP_RV670: - case CHIP_RV620: - case CHIP_RV635: - case CHIP_RS780: - case CHIP_RS880: - rscreen->chip_class = R600; - break; - case CHIP_RV770: - case CHIP_RV730: - case CHIP_RV710: - case CHIP_RV740: - rscreen->chip_class = R700; - break; - default: - FREE(rscreen); - return NULL; - } rscreen->radeon = radeon; rscreen->screen.winsys = (struct pipe_winsys*)radeon; rscreen->screen.destroy = r600_destroy_screen; diff --git a/src/gallium/drivers/r600/r600_state_inlines.h b/src/gallium/drivers/r600/r600_state_inlines.h index b4c21d9e12..283f1e59b3 100644 --- a/src/gallium/drivers/r600/r600_state_inlines.h +++ b/src/gallium/drivers/r600/r600_state_inlines.h @@ -283,6 +283,10 @@ static inline uint32_t r600_translate_colorswap(enum pipe_format format) case PIPE_FORMAT_B4G4R4A4_UNORM: case PIPE_FORMAT_B4G4R4X4_UNORM: return V_0280A0_SWAP_ALT; + + case PIPE_FORMAT_Z16_UNORM: + return V_0280A0_SWAP_STD; + /* 32-bit buffers. */ case PIPE_FORMAT_A8B8G8R8_SRGB: @@ -310,6 +314,10 @@ static inline uint32_t r600_translate_colorswap(enum pipe_format format) case PIPE_FORMAT_Z24_UNORM_S8_USCALED: return V_0280A0_SWAP_STD; + case PIPE_FORMAT_X8Z24_UNORM: + case PIPE_FORMAT_S8_USCALED_Z24_UNORM: + return V_0280A0_SWAP_STD; + case PIPE_FORMAT_R10G10B10A2_UNORM: case PIPE_FORMAT_R10G10B10X2_SNORM: case PIPE_FORMAT_B10G10R10A2_UNORM: @@ -357,6 +365,9 @@ static INLINE uint32_t r600_translate_colorformat(enum pipe_format format) case PIPE_FORMAT_B4G4R4X4_UNORM: return V_0280A0_COLOR_4_4_4_4; + case PIPE_FORMAT_Z16_UNORM: + return V_0280A0_COLOR_16; + /* 32-bit buffers. */ case PIPE_FORMAT_A8B8G8R8_SRGB: case PIPE_FORMAT_A8B8G8R8_UNORM: @@ -383,6 +394,10 @@ static INLINE uint32_t r600_translate_colorformat(enum pipe_format format) case PIPE_FORMAT_Z24_UNORM_S8_USCALED: return V_0280A0_COLOR_8_24; + case PIPE_FORMAT_X8Z24_UNORM: + case PIPE_FORMAT_S8_USCALED_Z24_UNORM: + return V_0280A0_COLOR_24_8; + case PIPE_FORMAT_R32_FLOAT: return V_0280A0_COLOR_32_FLOAT; diff --git a/src/gallium/drivers/r600/r600_states_inc.h b/src/gallium/drivers/r600/r600_states_inc.h index de717f3536..1c8075ebdb 100644 --- a/src/gallium/drivers/r600/r600_states_inc.h +++ b/src/gallium/drivers/r600/r600_states_inc.h @@ -15,35 +15,34 @@ #define R600_CONFIG__DB_WATERMARKS 10 #define R600_CONFIG__SX_MISC 11 #define R600_CONFIG__SPI_THREAD_GROUPING 12 -#define R600_CONFIG__CB_SHADER_CONTROL 13 -#define R600_CONFIG__SQ_ESGS_RING_ITEMSIZE 14 -#define R600_CONFIG__SQ_GSVS_RING_ITEMSIZE 15 -#define R600_CONFIG__SQ_ESTMP_RING_ITEMSIZE 16 -#define R600_CONFIG__SQ_GSTMP_RING_ITEMSIZE 17 -#define R600_CONFIG__SQ_VSTMP_RING_ITEMSIZE 18 -#define R600_CONFIG__SQ_PSTMP_RING_ITEMSIZE 19 -#define R600_CONFIG__SQ_FBUF_RING_ITEMSIZE 20 -#define R600_CONFIG__SQ_REDUC_RING_ITEMSIZE 21 -#define R600_CONFIG__SQ_GS_VERT_ITEMSIZE 22 -#define R600_CONFIG__VGT_OUTPUT_PATH_CNTL 23 -#define R600_CONFIG__VGT_HOS_CNTL 24 -#define R600_CONFIG__VGT_HOS_MAX_TESS_LEVEL 25 -#define R600_CONFIG__VGT_HOS_MIN_TESS_LEVEL 26 -#define R600_CONFIG__VGT_HOS_REUSE_DEPTH 27 -#define R600_CONFIG__VGT_GROUP_PRIM_TYPE 28 -#define R600_CONFIG__VGT_GROUP_FIRST_DECR 29 -#define R600_CONFIG__VGT_GROUP_DECR 30 -#define R600_CONFIG__VGT_GROUP_VECT_0_CNTL 31 -#define R600_CONFIG__VGT_GROUP_VECT_1_CNTL 32 -#define R600_CONFIG__VGT_GROUP_VECT_0_FMT_CNTL 33 -#define R600_CONFIG__VGT_GROUP_VECT_1_FMT_CNTL 34 -#define R600_CONFIG__VGT_GS_MODE 35 -#define R600_CONFIG__PA_SC_MODE_CNTL 36 -#define R600_CONFIG__VGT_STRMOUT_EN 37 -#define R600_CONFIG__VGT_REUSE_OFF 38 -#define R600_CONFIG__VGT_VTX_CNT_EN 39 -#define R600_CONFIG__VGT_STRMOUT_BUFFER_EN 40 -#define R600_CONFIG_SIZE 41 +#define R600_CONFIG__SQ_ESGS_RING_ITEMSIZE 13 +#define R600_CONFIG__SQ_GSVS_RING_ITEMSIZE 14 +#define R600_CONFIG__SQ_ESTMP_RING_ITEMSIZE 15 +#define R600_CONFIG__SQ_GSTMP_RING_ITEMSIZE 16 +#define R600_CONFIG__SQ_VSTMP_RING_ITEMSIZE 17 +#define R600_CONFIG__SQ_PSTMP_RING_ITEMSIZE 18 +#define R600_CONFIG__SQ_FBUF_RING_ITEMSIZE 19 +#define R600_CONFIG__SQ_REDUC_RING_ITEMSIZE 20 +#define R600_CONFIG__SQ_GS_VERT_ITEMSIZE 21 +#define R600_CONFIG__VGT_OUTPUT_PATH_CNTL 22 +#define R600_CONFIG__VGT_HOS_CNTL 23 +#define R600_CONFIG__VGT_HOS_MAX_TESS_LEVEL 24 +#define R600_CONFIG__VGT_HOS_MIN_TESS_LEVEL 25 +#define R600_CONFIG__VGT_HOS_REUSE_DEPTH 26 +#define R600_CONFIG__VGT_GROUP_PRIM_TYPE 27 +#define R600_CONFIG__VGT_GROUP_FIRST_DECR 28 +#define R600_CONFIG__VGT_GROUP_DECR 29 +#define R600_CONFIG__VGT_GROUP_VECT_0_CNTL 30 +#define R600_CONFIG__VGT_GROUP_VECT_1_CNTL 31 +#define R600_CONFIG__VGT_GROUP_VECT_0_FMT_CNTL 32 +#define R600_CONFIG__VGT_GROUP_VECT_1_FMT_CNTL 33 +#define R600_CONFIG__VGT_GS_MODE 34 +#define R600_CONFIG__PA_SC_MODE_CNTL 35 +#define R600_CONFIG__VGT_STRMOUT_EN 36 +#define R600_CONFIG__VGT_REUSE_OFF 37 +#define R600_CONFIG__VGT_VTX_CNT_EN 38 +#define R600_CONFIG__VGT_STRMOUT_BUFFER_EN 39 +#define R600_CONFIG_SIZE 40 #define R600_CONFIG_PM4 128 /* R600_CB_CNTL */ @@ -65,7 +64,8 @@ #define R600_CB_CNTL__CB_CLRCMP_DST 15 #define R600_CB_CNTL__CB_CLRCMP_MSK 16 #define R600_CB_CNTL__PA_SC_AA_MASK 17 -#define R600_CB_CNTL_SIZE 18 +#define R600_CB_CNTL__CB_SHADER_CONTROL 18 +#define R600_CB_CNTL_SIZE 19 #define R600_CB_CNTL_PM4 128 /* R600_RASTERIZER */ diff --git a/src/gallium/drivers/r600/r600_texture.c b/src/gallium/drivers/r600/r600_texture.c index 274679d127..f60fe9f316 100644 --- a/src/gallium/drivers/r600/r600_texture.c +++ b/src/gallium/drivers/r600/r600_texture.c @@ -73,7 +73,7 @@ static unsigned long r600_texture_get_offset(struct r600_resource_texture *rtex, } } -static void r600_setup_miptree(struct r600_resource_texture *rtex) +static void r600_setup_miptree(struct r600_resource_texture *rtex, enum chip_class chipc) { struct pipe_resource *ptex = &rtex->resource.base.b; unsigned long w, h, pitch, size, layer_size, i, offset; @@ -84,10 +84,17 @@ static void r600_setup_miptree(struct r600_resource_texture *rtex) h = u_minify(ptex->height0, i); h = util_next_power_of_two(h); pitch = util_format_get_stride(ptex->format, align(w, 64)); - pitch = align(pitch, 256); + if (chipc == EVERGREEN) + pitch = align(pitch, 512); + else + pitch = align(pitch, 256); layer_size = pitch * h; - if (ptex->target == PIPE_TEXTURE_CUBE) - size = layer_size * 6; + if (ptex->target == PIPE_TEXTURE_CUBE) { + if (chipc >= R700) + size = layer_size * 8; + else + size = layer_size * 6; + } else size = layer_size * u_minify(ptex->depth0, i); rtex->offset[i] = offset; @@ -116,7 +123,7 @@ struct pipe_resource *r600_texture_create(struct pipe_screen *screen, resource->base.vtbl = &r600_texture_vtbl; pipe_reference_init(&resource->base.b.reference, 1); resource->base.b.screen = screen; - r600_setup_miptree(rtex); + r600_setup_miptree(rtex, radeon_get_family_class(radeon)); /* FIXME alignment 4096 enought ? too much ? */ resource->domain = r600_domain_from_usage(resource->base.b.bind); @@ -315,7 +322,6 @@ void r600_texture_transfer_destroy(struct pipe_context *ctx, void* r600_texture_transfer_map(struct pipe_context *ctx, struct pipe_transfer* transfer) { - struct r600_screen *rscreen = r600_screen(ctx->screen); struct r600_transfer *rtransfer = (struct r600_transfer*)transfer; struct radeon_ws_bo *bo; enum pipe_format format = transfer->resource->format; @@ -325,12 +331,11 @@ void* r600_texture_transfer_map(struct pipe_context *ctx, char *map; int r; - ctx->flush(ctx, 0, NULL); if (rtransfer->linear_texture) { bo = ((struct r600_resource *)rtransfer->linear_texture)->bo; } else { rtex = (struct r600_resource_texture*)transfer->resource; - if (rtex->depth && rscreen->chip_class != EVERGREEN) { + if (rtex->depth && radeon_get_family_class(radeon) != EVERGREEN) { r = r600_texture_from_depth(ctx, rtex, transfer->sr.level); if (r) { return NULL; @@ -344,11 +349,10 @@ void* r600_texture_transfer_map(struct pipe_context *ctx, transfer->box.y / util_format_get_blockheight(format) * transfer->stride + transfer->box.x / util_format_get_blockwidth(format) * util_format_get_blocksize(format); } - map = radeon_ws_bo_map(radeon, bo, 0, r600_context(ctx)); + map = radeon_ws_bo_map(radeon, bo, 0, ctx); if (!map) { return NULL; } - radeon_ws_bo_wait(radeon, bo); return map + offset; } @@ -469,11 +473,13 @@ uint32_t r600_translate_texformat(enum pipe_format format, result = V_0280A0_COLOR_16; goto out_word4; case PIPE_FORMAT_Z24X8_UNORM: - result = V_0280A0_COLOR_8_24; - goto out_word4; case PIPE_FORMAT_Z24_UNORM_S8_USCALED: result = V_0280A0_COLOR_8_24; goto out_word4; + case PIPE_FORMAT_X8Z24_UNORM: + case PIPE_FORMAT_S8_USCALED_Z24_UNORM: + result = V_0280A0_COLOR_24_8; + goto out_word4; default: goto out_unknown; } diff --git a/src/gallium/drivers/r600/r600d.h b/src/gallium/drivers/r600/r600d.h index 07bfc0593e..f1aa49c0f7 100644 --- a/src/gallium/drivers/r600/r600d.h +++ b/src/gallium/drivers/r600/r600d.h @@ -2112,6 +2112,9 @@ #define R_0286D4_SPI_INTERP_CONTROL_0 0x0286D4 #define R_028A48_PA_SC_MPASS_PS_CNTL 0x028A48 #define R_028C00_PA_SC_LINE_CNTL 0x028C00 +#define S_028C00_LAST_PIXEL(x) (((x) & 0x1) << 10) +#define G_028C00_LAST_PIXEL(x) (((x) >> 10) & 0x1) +#define C_028C00_LAST_PIXEL 0xFFFFFBFF #define R_028C04_PA_SC_AA_CONFIG 0x028C04 #define R_028C1C_PA_SC_AA_SAMPLE_LOCS_MCTX 0x028C1C #define R_028C48_PA_SC_AA_MASK 0x028C48 @@ -2125,7 +2128,16 @@ #define R_028814_PA_SU_SC_MODE_CNTL 0x028814 #define R_028A00_PA_SU_POINT_SIZE 0x028A00 #define R_028A04_PA_SU_POINT_MINMAX 0x028A04 +#define S_028A04_MIN_SIZE(x) (((x) & 0xFFFF) << 0) +#define G_028A04_MIN_SIZE(x) (((x) >> 0) & 0xFFFF) +#define C_028A04_MIN_SIZE 0xFFFF0000 +#define S_028A04_MAX_SIZE(x) (((x) & 0xFFFF) << 16) +#define G_028A04_MAX_SIZE(x) (((x) >> 16) & 0xFFFF) +#define C_028A04_MAX_SIZE 0x0000FFFF #define R_028A08_PA_SU_LINE_CNTL 0x028A08 +#define S_028A08_WIDTH(x) (((x) & 0xFFFF) << 0) +#define G_028A08_WIDTH(x) (((x) >> 0) & 0xFFFF) +#define C_028A08_WIDTH 0xFFFF0000 #define R_028A0C_PA_SC_LINE_STIPPLE 0x028A0C #define R_028DF8_PA_SU_POLY_OFFSET_DB_FMT_CNTL 0x028DF8 #define R_028DFC_PA_SU_POLY_OFFSET_CLAMP 0x028DFC @@ -2134,6 +2146,27 @@ #define R_028E08_PA_SU_POLY_OFFSET_BACK_SCALE 0x028E08 #define R_028E0C_PA_SU_POLY_OFFSET_BACK_OFFSET 0x028E0C #define R_028818_PA_CL_VTE_CNTL 0x028818 +#define S_028818_VPORT_X_SCALE_ENA(x) (((x) & 0x1) << 0) +#define G_028818_VPORT_X_SCALE_ENA(x) (((x) >> 0 & 0x1) +#define C_028818_VPORT_X_SCALE_ENA 0xFFFFFFFE +#define S_028818_VPORT_X_OFFSET_ENA(x) (((x) & 0x1) << 1) +#define G_028818_VPORT_X_OFFSET_ENA(x) (((x) >> 1 & 0x1) +#define C_028818_VPORT_X_OFFSET_ENA 0xFFFFFFFD +#define S_028818_VPORT_Y_SCALE_ENA(x) (((x) & 0x1) << 2) +#define G_028818_VPORT_Y_SCALE_ENA(x) (((x) >> 2 & 0x1) +#define C_028818_VPORT_Y_SCALE_ENA 0xFFFFFFFB +#define S_028818_VPORT_Y_OFFSET_ENA(x) (((x) & 0x1) << 3) +#define G_028818_VPORT_Y_OFFSET_ENA(x) (((x) >> 3 & 0x1) +#define C_028818_VPORT_Y_OFFSET_ENA 0xFFFFFFF7 +#define S_028818_VPORT_Z_SCALE_ENA(x) (((x) & 0x1) << 4) +#define G_028818_VPORT_Z_SCALE_ENA(x) (((x) >> 4 & 0x1) +#define C_028818_VPORT_Z_SCALE_ENA 0xFFFFFFEF +#define S_028818_VPORT_Z_OFFSET_ENA(x) (((x) & 0x1) << 5) +#define G_028818_VPORT_Z_OFFSET_ENA(x) (((x) >> 5 & 0x1) +#define C_028818_VPORT_Z_OFFSET_ENA 0xFFFFFFDF +#define S_028818_VTX_W0_FMT(x) (((x) & 0x1) << 10) +#define G_028818_VTX_W0_FMT(x) (((x) >> 10) & 0x1) +#define C_028818_VTX_W0_FMT 0xFFFFFBFF #define R_02843C_PA_CL_VPORT_XSCALE_0 0x02843C #define R_028444_PA_CL_VPORT_YSCALE_0 0x028444 #define R_02844C_PA_CL_VPORT_ZSCALE_0 0x02844C @@ -2199,6 +2232,12 @@ #define R_0286C0_SPI_PS_INPUT_CNTL_31 0x0286C0 #define R_028850_SQ_PGM_RESOURCES_PS 0x028850 #define R_028854_SQ_PGM_EXPORTS_PS 0x028854 +#define S_028854_EXPORT_COLORS(x) (((x) & 0xF) << 1) +#define G_028854_EXPORT_COLORS(x) (((x) >> 1) & 0xF) +#define C_028854_EXPORT_COLORS 0xFFFFFFE1 +#define S_028854_EXPORT_Z(x) (((x) & 0x1) << 0) +#define G_028854_EXPORT_Z(x) (((x) >> 0) & 0x1) +#define C_028854_EXPORT_Z 0xFFFFFFFE #define R_008958_VGT_PRIMITIVE_TYPE 0x008958 #define R_028A7C_VGT_DMA_INDEX_TYPE 0x028A7C #define R_028A88_VGT_DMA_NUM_INSTANCES 0x028A88 diff --git a/src/gallium/drivers/r600/radeon.h b/src/gallium/drivers/r600/radeon.h index 5f9f21db1b..a7e7982c19 100644 --- a/src/gallium/drivers/r600/radeon.h +++ b/src/gallium/drivers/r600/radeon.h @@ -79,6 +79,12 @@ enum radeon_family { CHIP_LAST, }; +enum chip_class { + R600, + R700, + EVERGREEN, +}; + enum { R600_SHADER_PS = 1, R600_SHADER_VS, @@ -88,6 +94,7 @@ enum { }; enum radeon_family radeon_get_family(struct radeon *rw); +enum chip_class radeon_get_family_class(struct radeon *radeon); void radeon_set_mem_constant(struct radeon *radeon, boolean state); /* lowlevel WS bo */ @@ -100,9 +107,11 @@ void *radeon_ws_bo_map(struct radeon *radeon, struct radeon_ws_bo *bo, unsigned void radeon_ws_bo_unmap(struct radeon *radeon, struct radeon_ws_bo *bo); void radeon_ws_bo_reference(struct radeon *radeon, struct radeon_ws_bo **dst, struct radeon_ws_bo *src); -int radeon_ws_bo_wait(struct radeon *radeon, struct radeon_ws_bo *bo); struct radeon_stype_info; + +/* currently limited to max buffers in a cb flush */ +#define RADEON_STATE_MAX_BO 8 /* * states functions */ @@ -120,7 +129,7 @@ struct radeon_state { u32 pm4_crc; u32 pm4[128]; unsigned nbo; - struct radeon_ws_bo *bo[4]; + struct radeon_ws_bo *bo[RADEON_STATE_MAX_BO]; unsigned nreloc; unsigned reloc_pm4_id[8]; unsigned reloc_bo_id[8]; @@ -212,5 +221,4 @@ enum r600_stype { #define R600_QUERY_SIZE 1 #define R600_QUERY_PM4 128 -void r600_flush_ctx(void *data); #endif |