diff options
Diffstat (limited to 'src/gallium/drivers/r600')
29 files changed, 3595 insertions, 4074 deletions
diff --git a/src/gallium/drivers/r600/Android.mk b/src/gallium/drivers/r600/Android.mk new file mode 100644 index 0000000000..b76a78810f --- /dev/null +++ b/src/gallium/drivers/r600/Android.mk @@ -0,0 +1,43 @@ +ifeq ($(strip $(MESA_BUILD_R600G)),true) + +LOCAL_PATH := $(call my-dir) + +# from Makefile +C_SOURCES = \ + r600_asm.c \ + r600_blit.c \ + r600_buffer.c \ + r600_helper.c \ + r600_pipe.c \ + r600_query.c \ + r600_resource.c \ + r600_shader.c \ + r600_state.c \ + r600_texture.c \ + r700_asm.c \ + evergreen_state.c \ + eg_asm.c \ + r600_translate.c \ + r600_state_common.c + +include $(CLEAR_VARS) + +LOCAL_SRC_FILES := \ + $(C_SOURCES) + +LOCAL_CFLAGS := \ + -std=c99 \ + -fvisibility=hidden \ + -Wno-sign-compare + +LOCAL_C_INCLUDES := \ + external/mesa/src/gallium/include \ + external/mesa/src/gallium/auxiliary \ + external/drm \ + external/drm/include/drm + +LOCAL_MODULE := libmesa_pipe_r600 + +include $(BUILD_STATIC_LIBRARY) + +endif # MESA_BUILD_R600G diff --git a/src/gallium/drivers/r600/SConscript b/src/gallium/drivers/r600/SConscript index 3fc1fa94c2..5a5fa6d65f 100644 --- a/src/gallium/drivers/r600/SConscript +++ b/src/gallium/drivers/r600/SConscript @@ -9,7 +9,7 @@ except OSError: Return() env.Append(CPPPATH = [ - '#/include', + '#/include', '#/src/mesa', ]) diff --git a/src/gallium/drivers/r600/eg_asm.c b/src/gallium/drivers/r600/eg_asm.c index b79875c7c7..3793b919dd 100644 --- a/src/gallium/drivers/r600/eg_asm.c +++ b/src/gallium/drivers/r600/eg_asm.c @@ -35,15 +35,17 @@ int eg_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf) switch (cf->inst) { case (EG_V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3): + case (EG_V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER << 3): + case (EG_V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER << 3): case (EG_V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE << 3): bc->bytecode[id++] = S_SQ_CF_ALU_WORD0_ADDR(cf->addr >> 1) | - S_SQ_CF_ALU_WORD0_KCACHE_MODE0(cf->kcache0_mode) | - S_SQ_CF_ALU_WORD0_KCACHE_BANK0(cf->kcache0_bank) | - S_SQ_CF_ALU_WORD0_KCACHE_BANK1(cf->kcache1_bank); + S_SQ_CF_ALU_WORD0_KCACHE_MODE0(cf->kcache[0].mode) | + S_SQ_CF_ALU_WORD0_KCACHE_BANK0(cf->kcache[0].bank) | + S_SQ_CF_ALU_WORD0_KCACHE_BANK1(cf->kcache[1].bank); bc->bytecode[id++] = S_SQ_CF_ALU_WORD1_CF_INST(cf->inst >> 3) | - S_SQ_CF_ALU_WORD1_KCACHE_MODE1(cf->kcache1_mode) | - S_SQ_CF_ALU_WORD1_KCACHE_ADDR0(cf->kcache0_addr) | - S_SQ_CF_ALU_WORD1_KCACHE_ADDR1(cf->kcache1_addr) | + S_SQ_CF_ALU_WORD1_KCACHE_MODE1(cf->kcache[1].mode) | + S_SQ_CF_ALU_WORD1_KCACHE_ADDR0(cf->kcache[0].addr) | + S_SQ_CF_ALU_WORD1_KCACHE_ADDR1(cf->kcache[1].addr) | S_SQ_CF_ALU_WORD1_BARRIER(1) | S_SQ_CF_ALU_WORD1_COUNT((cf->ndw / 2) - 1); break; @@ -60,7 +62,8 @@ int eg_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf) S_SQ_CF_ALLOC_EXPORT_WORD0_ELEM_SIZE(cf->output.elem_size) | S_SQ_CF_ALLOC_EXPORT_WORD0_ARRAY_BASE(cf->output.array_base) | S_SQ_CF_ALLOC_EXPORT_WORD0_TYPE(cf->output.type); - bc->bytecode[id++] = S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_X(cf->output.swizzle_x) | + bc->bytecode[id++] = S_SQ_CF_ALLOC_EXPORT_WORD1_BURST_COUNT(cf->output.burst_count - 1) | + S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_X(cf->output.swizzle_x) | S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Y(cf->output.swizzle_y) | S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Z(cf->output.swizzle_z) | S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_W(cf->output.swizzle_w) | @@ -90,37 +93,3 @@ int eg_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf) } return 0; } - -void eg_cf_vtx(struct r600_vertex_element *ve, u32 *bytecode, unsigned count) -{ - struct r600_pipe_state *rstate; - unsigned i = 0; - - if (count > 8) { - bytecode[i++] = S_SQ_CF_WORD0_ADDR(8 >> 1); - bytecode[i++] = S_SQ_CF_WORD1_CF_INST(EG_V_SQ_CF_WORD1_SQ_CF_INST_VTX) | - S_SQ_CF_WORD1_BARRIER(1) | - S_SQ_CF_WORD1_COUNT(8 - 1); - bytecode[i++] = S_SQ_CF_WORD0_ADDR(40 >> 1); - bytecode[i++] = S_SQ_CF_WORD1_CF_INST(EG_V_SQ_CF_WORD1_SQ_CF_INST_VTX) | - S_SQ_CF_WORD1_BARRIER(1) | - S_SQ_CF_WORD1_COUNT(count - 8 - 1); - } else { - bytecode[i++] = S_SQ_CF_WORD0_ADDR(8 >> 1); - bytecode[i++] = S_SQ_CF_WORD1_CF_INST(EG_V_SQ_CF_WORD1_SQ_CF_INST_VTX) | - S_SQ_CF_WORD1_BARRIER(1) | - S_SQ_CF_WORD1_COUNT(count - 1); - } - bytecode[i++] = S_SQ_CF_WORD0_ADDR(0); - bytecode[i++] = S_SQ_CF_WORD1_CF_INST(EG_V_SQ_CF_WORD1_SQ_CF_INST_RETURN) | - S_SQ_CF_WORD1_BARRIER(1); - - rstate = &ve->rstate; - rstate->id = R600_PIPE_STATE_FETCH_SHADER; - rstate->nregs = 0; - r600_pipe_state_add_reg(rstate, R_0288A8_SQ_PGM_RESOURCES_FS, - 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0288A4_SQ_PGM_START_FS, - (r600_bo_offset(ve->fetch_shader)) >> 8, - 0xFFFFFFFF, ve->fetch_shader); -} diff --git a/src/gallium/drivers/r600/eg_state_inlines.h b/src/gallium/drivers/r600/eg_state_inlines.h index ecea1db4f1..cae3888051 100644 --- a/src/gallium/drivers/r600/eg_state_inlines.h +++ b/src/gallium/drivers/r600/eg_state_inlines.h @@ -253,9 +253,13 @@ static inline unsigned r600_tex_dim(unsigned dim) default: case PIPE_TEXTURE_1D: return V_030000_SQ_TEX_DIM_1D; + case PIPE_TEXTURE_1D_ARRAY: + return V_030000_SQ_TEX_DIM_1D_ARRAY; case PIPE_TEXTURE_2D: case PIPE_TEXTURE_RECT: return V_030000_SQ_TEX_DIM_2D; + case PIPE_TEXTURE_2D_ARRAY: + return V_030000_SQ_TEX_DIM_2D_ARRAY; case PIPE_TEXTURE_3D: return V_030000_SQ_TEX_DIM_3D; case PIPE_TEXTURE_CUBE: @@ -289,10 +293,14 @@ static inline uint32_t r600_translate_colorswap(enum pipe_format format) { switch (format) { /* 8-bit buffers. */ + case PIPE_FORMAT_L4A4_UNORM: + return V_028C70_SWAP_ALT; + case PIPE_FORMAT_A8_UNORM: return V_028C70_SWAP_ALT_REV; case PIPE_FORMAT_I8_UNORM: case PIPE_FORMAT_L8_UNORM: + case PIPE_FORMAT_L8_SRGB: case PIPE_FORMAT_R8_UNORM: case PIPE_FORMAT_R8_SNORM: return V_028C70_SWAP_STD; @@ -313,6 +321,7 @@ static inline uint32_t r600_translate_colorswap(enum pipe_format format) return V_028C70_SWAP_STD; case PIPE_FORMAT_L8A8_UNORM: + case PIPE_FORMAT_L8A8_SRGB: return V_028C70_SWAP_ALT; case PIPE_FORMAT_R8G8_UNORM: return V_028C70_SWAP_STD; @@ -352,9 +361,11 @@ static inline uint32_t r600_translate_colorswap(enum pipe_format format) case PIPE_FORMAT_R10G10B10A2_UNORM: case PIPE_FORMAT_R10G10B10X2_SNORM: - case PIPE_FORMAT_B10G10R10A2_UNORM: case PIPE_FORMAT_R10SG10SB10SA2U_NORM: - return V_028C70_SWAP_STD_REV; + return V_028C70_SWAP_STD; + + case PIPE_FORMAT_B10G10R10A2_UNORM: + return V_028C70_SWAP_ALT; case PIPE_FORMAT_R16G16_UNORM: return V_028C70_SWAP_STD; @@ -362,14 +373,13 @@ static inline uint32_t r600_translate_colorswap(enum pipe_format format) /* 64-bit buffers. */ case PIPE_FORMAT_R16G16B16A16_UNORM: case PIPE_FORMAT_R16G16B16A16_SNORM: - // return V_028C70_COLOR_16_16_16_16; case PIPE_FORMAT_R16G16B16A16_FLOAT: - // return V_028C70_COLOR_16_16_16_16_FLOAT; /* 128-bit buffers. */ case PIPE_FORMAT_R32G32B32A32_FLOAT: - // return V_028C70_COLOR_32_32_32_32_FLOAT; - return 0; + case PIPE_FORMAT_R32G32B32A32_SNORM: + case PIPE_FORMAT_R32G32B32A32_UNORM: + return V_028C70_SWAP_STD; default: R600_ERR("unsupported colorswap format %d\n", format); return ~0; @@ -381,9 +391,13 @@ static INLINE uint32_t r600_translate_colorformat(enum pipe_format format) { switch (format) { /* 8-bit buffers. */ + case PIPE_FORMAT_L4A4_UNORM: + return V_028C70_COLOR_4_4; + case PIPE_FORMAT_A8_UNORM: case PIPE_FORMAT_I8_UNORM: case PIPE_FORMAT_L8_UNORM: + case PIPE_FORMAT_L8_SRGB: case PIPE_FORMAT_R8_UNORM: case PIPE_FORMAT_R8_SNORM: return V_028C70_COLOR_8; @@ -404,6 +418,7 @@ static INLINE uint32_t r600_translate_colorformat(enum pipe_format format) return V_028C70_COLOR_16; case PIPE_FORMAT_L8A8_UNORM: + case PIPE_FORMAT_L8A8_SRGB: case PIPE_FORMAT_R8G8_UNORM: return V_028C70_COLOR_8_8; @@ -430,7 +445,7 @@ static INLINE uint32_t r600_translate_colorformat(enum pipe_format format) case PIPE_FORMAT_R10G10B10X2_SNORM: case PIPE_FORMAT_B10G10R10A2_UNORM: case PIPE_FORMAT_R10SG10SB10SA2U_NORM: - return V_028C70_COLOR_10_10_10_2; + return V_028C70_COLOR_2_10_10_10; case PIPE_FORMAT_Z24X8_UNORM: case PIPE_FORMAT_Z24_UNORM_S8_USCALED: @@ -471,6 +486,9 @@ static INLINE uint32_t r600_translate_colorformat(enum pipe_format format) return V_028C70_COLOR_32_32; /* 128-bit buffers. */ + case PIPE_FORMAT_R32G32B32A32_SNORM: + case PIPE_FORMAT_R32G32B32A32_UNORM: + return V_028C70_COLOR_32_32_32_32; case PIPE_FORMAT_R32G32B32_FLOAT: return V_028C70_COLOR_32_32_32_FLOAT; case PIPE_FORMAT_R32G32B32A32_FLOAT: @@ -485,9 +503,9 @@ static INLINE uint32_t r600_translate_colorformat(enum pipe_format format) } } -static INLINE boolean r600_is_sampler_format_supported(enum pipe_format format) +static INLINE boolean r600_is_sampler_format_supported(struct pipe_screen *screen, enum pipe_format format) { - return r600_translate_texformat(format, NULL, NULL, NULL) != ~0; + return r600_translate_texformat(screen, format, NULL, NULL, NULL) != ~0; } static INLINE boolean r600_is_colorbuffer_format_supported(enum pipe_format format) @@ -501,144 +519,4 @@ static INLINE boolean r600_is_zs_format_supported(enum pipe_format format) return r600_translate_dbformat(format) != ~0; } -static INLINE boolean r600_is_vertex_format_supported(enum pipe_format format) -{ - return r600_translate_colorformat(format) != ~0; -} - -static INLINE uint32_t r600_translate_vertex_data_type(enum pipe_format format) -{ - uint32_t result = 0; - const struct util_format_description *desc; - unsigned i; - - desc = util_format_description(format); - if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN) { - goto out_unknown; - } - - /* Find the first non-VOID channel. */ - for (i = 0; i < 4; i++) { - if (desc->channel[i].type != UTIL_FORMAT_TYPE_VOID) { - break; - } - } - - switch (desc->channel[i].type) { - /* Half-floats, floats, doubles */ - case UTIL_FORMAT_TYPE_FLOAT: - switch (desc->channel[i].size) { - case 16: - switch (desc->nr_channels) { - case 1: - result = FMT_16_FLOAT; - break; - case 2: - result = FMT_16_16_FLOAT; - break; - case 3: - result = FMT_16_16_16_FLOAT; - break; - case 4: - result = FMT_16_16_16_16_FLOAT; - break; - } - break; - case 32: - switch (desc->nr_channels) { - case 1: - result = FMT_32_FLOAT; - break; - case 2: - result = FMT_32_32_FLOAT; - break; - case 3: - result = FMT_32_32_32_FLOAT; - break; - case 4: - result = FMT_32_32_32_32_FLOAT; - break; - } - break; - default: - goto out_unknown; - } - break; - /* Unsigned ints */ - case UTIL_FORMAT_TYPE_UNSIGNED: - /* Signed ints */ - case UTIL_FORMAT_TYPE_SIGNED: - switch (desc->channel[i].size) { - case 8: - switch (desc->nr_channels) { - case 1: - result = FMT_8; - break; - case 2: - result = FMT_8_8; - break; - case 3: -// result = V_038008_FMT_8_8_8; /* fails piglit draw-vertices test */ -// break; - case 4: - result = FMT_8_8_8_8; - break; - } - break; - case 16: - switch (desc->nr_channels) { - case 1: - result = FMT_16; - break; - case 2: - result = FMT_16_16; - break; - case 3: -// result = V_038008_FMT_16_16_16; /* fails piglit draw-vertices test */ -// break; - case 4: - result = FMT_16_16_16_16; - break; - } - break; - case 32: - switch (desc->nr_channels) { - case 1: - result = FMT_32; - break; - case 2: - result = FMT_32_32; - break; - case 3: - result = FMT_32_32_32; - break; - case 4: - result = FMT_32_32_32_32; - break; - } - break; - default: - goto out_unknown; - } - break; - default: - goto out_unknown; - } - - result = S_030008_DATA_FORMAT(result); - - if (desc->channel[i].type == UTIL_FORMAT_TYPE_SIGNED) { - result |= S_030008_FORMAT_COMP_ALL(1); - } - if (desc->channel[i].normalized) { - result |= S_030008_NUM_FORMAT_ALL(0); - } else { - result |= S_030008_NUM_FORMAT_ALL(2); - } - return result; -out_unknown: - R600_ERR("unsupported vertex format %s\n", util_format_name(format)); - return ~0; -} - #endif diff --git a/src/gallium/drivers/r600/eg_states_inc.h b/src/gallium/drivers/r600/eg_states_inc.h deleted file mode 100644 index 1379c11291..0000000000 --- a/src/gallium/drivers/r600/eg_states_inc.h +++ /dev/null @@ -1,458 +0,0 @@ -/* This file is autogenerated from eg_states.h - do not edit directly */ -/* autogenerating script is gen_eg_states.py */ - -/* EG_CONFIG */ -#define EG_CONFIG__SQ_CONFIG 0 -#define EG_CONFIG__SPI_CONFIG_CNTL 1 -#define EG_CONFIG__SPI_CONFIG_CNTL_1 2 -#define EG_CONFIG__SQ_GPR_RESOURCE_MGMT_1 3 -#define EG_CONFIG__SQ_GPR_RESOURCE_MGMT_2 4 -#define EG_CONFIG__SQ_GPR_RESOURCE_MGMT_3 5 -#define EG_CONFIG__SQ_THREAD_RESOURCE_MGMT_1 6 -#define EG_CONFIG__SQ_THREAD_RESOURCE_MGMT_2 7 -#define EG_CONFIG__SQ_STACK_RESOURCE_MGMT_1 8 -#define EG_CONFIG__SQ_STACK_RESOURCE_MGMT_2 9 -#define EG_CONFIG__SQ_STACK_RESOURCE_MGMT_3 10 -#define EG_CONFIG__SQ_DYN_GPR_CNTL_PS_FLUSH_REQ 11 -#define EG_CONFIG__PA_CL_ENHANCE 12 -#define EG_CONFIG__SQ_DYN_GPR_RESOURCE_LIMIT_1 13 -#define EG_CONFIG__SQ_LDS_ALLOC_PS 14 -#define EG_CONFIG__SX_MISC 15 -#define EG_CONFIG__SQ_ESGS_RING_ITEMSIZE 16 -#define EG_CONFIG__SQ_GSVS_RING_ITEMSIZE 17 -#define EG_CONFIG__SQ_ESTMP_RING_ITEMSIZE 18 -#define EG_CONFIG__SQ_GSTMP_RING_ITEMSIZE 19 -#define EG_CONFIG__SQ_VSTMP_RING_ITEMSIZE 20 -#define EG_CONFIG__SQ_PSTMP_RING_ITEMSIZE 21 -#define EG_CONFIG__SQ_GS_VERT_ITEMSIZE 22 -#define EG_CONFIG__SQ_GS_VERT_ITEMSIZE_1 23 -#define EG_CONFIG__SQ_GS_VERT_ITEMSIZE_2 24 -#define EG_CONFIG__SQ_GS_VERT_ITEMSIZE_3 25 -#define EG_CONFIG__VGT_OUTPUT_PATH_CNTL 26 -#define EG_CONFIG__VGT_HOS_CNTL 27 -#define EG_CONFIG__VGT_HOS_MAX_TESS_LEVEL 28 -#define EG_CONFIG__VGT_HOS_MIN_TESS_LEVEL 29 -#define EG_CONFIG__VGT_HOS_REUSE_DEPTH 30 -#define EG_CONFIG__VGT_GROUP_PRIM_TYPE 31 -#define EG_CONFIG__VGT_GROUP_FIRST_DECR 32 -#define EG_CONFIG__VGT_GROUP_DECR 33 -#define EG_CONFIG__VGT_GROUP_VECT_0_CNTL 34 -#define EG_CONFIG__VGT_GROUP_VECT_1_CNTL 35 -#define EG_CONFIG__VGT_GROUP_VECT_0_FMT_CNTL 36 -#define EG_CONFIG__VGT_GROUP_VECT_1_FMT_CNTL 37 -#define EG_CONFIG__VGT_GS_MODE 38 -#define EG_CONFIG__PA_SC_MODE_CNTL_0 39 -#define EG_CONFIG__PA_SC_MODE_CNTL_1 40 -#define EG_CONFIG__VGT_REUSE_OFF 41 -#define EG_CONFIG__VGT_VTX_CNT_EN 42 -#define EG_CONFIG__VGT_SHADER_STAGES_EN 43 -#define EG_CONFIG__VGT_STRMOUT_CONFIG 44 -#define EG_CONFIG__VGT_STRMOUT_BUFFER_CONFIG 45 -#define EG_CONFIG_SIZE 46 -#define EG_CONFIG_PM4 128 - -/* EG_CB_CNTL */ -#define EG_CB_CNTL__CB_TARGET_MASK 0 -#define EG_CB_CNTL__CB_SHADER_MASK 1 -#define EG_CB_CNTL__CB_COLOR_CONTROL 2 -#define EG_CB_CNTL__PA_SC_AA_CONFIG 3 -#define EG_CB_CNTL__PA_SC_AA_SAMPLE_LOCS_MCTX 4 -#define EG_CB_CNTL__PA_SC_AA_MASK 5 -#define EG_CB_CNTL_SIZE 6 -#define EG_CB_CNTL_PM4 128 - -/* EG_RASTERIZER */ -#define EG_RASTERIZER__SPI_INTERP_CONTROL_0 0 -#define EG_RASTERIZER__PA_CL_CLIP_CNTL 1 -#define EG_RASTERIZER__PA_SU_SC_MODE_CNTL 2 -#define EG_RASTERIZER__PA_CL_VS_OUT_CNTL 3 -#define EG_RASTERIZER__PA_CL_NANINF_CNTL 4 -#define EG_RASTERIZER__PA_SU_POINT_SIZE 5 -#define EG_RASTERIZER__PA_SU_POINT_MINMAX 6 -#define EG_RASTERIZER__PA_SU_LINE_CNTL 7 -#define EG_RASTERIZER__PA_SC_MPASS_PS_CNTL 8 -#define EG_RASTERIZER__PA_SC_LINE_CNTL 9 -#define EG_RASTERIZER__PA_SU_VTX_CNTL 10 -#define EG_RASTERIZER__PA_CL_GB_VERT_CLIP_ADJ 11 -#define EG_RASTERIZER__PA_CL_GB_VERT_DISC_ADJ 12 -#define EG_RASTERIZER__PA_CL_GB_HORZ_CLIP_ADJ 13 -#define EG_RASTERIZER__PA_CL_GB_HORZ_DISC_ADJ 14 -#define EG_RASTERIZER__PA_SU_POLY_OFFSET_DB_FMT_CNTL 15 -#define EG_RASTERIZER__PA_SU_POLY_OFFSET_CLAMP 16 -#define EG_RASTERIZER__PA_SU_POLY_OFFSET_FRONT_SCALE 17 -#define EG_RASTERIZER__PA_SU_POLY_OFFSET_FRONT_OFFSET 18 -#define EG_RASTERIZER__PA_SU_POLY_OFFSET_BACK_SCALE 19 -#define EG_RASTERIZER__PA_SU_POLY_OFFSET_BACK_OFFSET 20 -#define EG_RASTERIZER_SIZE 21 -#define EG_RASTERIZER_PM4 128 - -/* EG_VIEWPORT */ -#define EG_VIEWPORT__PA_SC_VPORT_ZMIN_0 0 -#define EG_VIEWPORT__PA_SC_VPORT_ZMAX_0 1 -#define EG_VIEWPORT__PA_CL_VPORT_XSCALE_0 2 -#define EG_VIEWPORT__PA_CL_VPORT_YSCALE_0 3 -#define EG_VIEWPORT__PA_CL_VPORT_ZSCALE_0 4 -#define EG_VIEWPORT__PA_CL_VPORT_XOFFSET_0 5 -#define EG_VIEWPORT__PA_CL_VPORT_YOFFSET_0 6 -#define EG_VIEWPORT__PA_CL_VPORT_ZOFFSET_0 7 -#define EG_VIEWPORT__PA_CL_VTE_CNTL 8 -#define EG_VIEWPORT_SIZE 9 -#define EG_VIEWPORT_PM4 128 - -/* EG_SCISSOR */ -#define EG_SCISSOR__PA_SC_SCREEN_SCISSOR_TL 0 -#define EG_SCISSOR__PA_SC_SCREEN_SCISSOR_BR 1 -#define EG_SCISSOR__PA_SC_WINDOW_OFFSET 2 -#define EG_SCISSOR__PA_SC_WINDOW_SCISSOR_TL 3 -#define EG_SCISSOR__PA_SC_WINDOW_SCISSOR_BR 4 -#define EG_SCISSOR__PA_SC_CLIPRECT_RULE 5 -#define EG_SCISSOR__PA_SC_CLIPRECT_0_TL 6 -#define EG_SCISSOR__PA_SC_CLIPRECT_0_BR 7 -#define EG_SCISSOR__PA_SC_CLIPRECT_1_TL 8 -#define EG_SCISSOR__PA_SC_CLIPRECT_1_BR 9 -#define EG_SCISSOR__PA_SC_CLIPRECT_2_TL 10 -#define EG_SCISSOR__PA_SC_CLIPRECT_2_BR 11 -#define EG_SCISSOR__PA_SC_CLIPRECT_3_TL 12 -#define EG_SCISSOR__PA_SC_CLIPRECT_3_BR 13 -#define EG_SCISSOR__PA_SC_EDGERULE 14 -#define EG_SCISSOR__PA_SC_GENERIC_SCISSOR_TL 15 -#define EG_SCISSOR__PA_SC_GENERIC_SCISSOR_BR 16 -#define EG_SCISSOR__PA_SC_VPORT_SCISSOR_0_TL 17 -#define EG_SCISSOR__PA_SC_VPORT_SCISSOR_0_BR 18 -#define EG_SCISSOR__PA_SU_HARDWARE_SCREEN_OFFSET 19 -#define EG_SCISSOR_SIZE 20 -#define EG_SCISSOR_PM4 128 - -/* EG_BLEND */ -#define EG_BLEND__CB_BLEND_RED 0 -#define EG_BLEND__CB_BLEND_GREEN 1 -#define EG_BLEND__CB_BLEND_BLUE 2 -#define EG_BLEND__CB_BLEND_ALPHA 3 -#define EG_BLEND__CB_BLEND0_CONTROL 4 -#define EG_BLEND__CB_BLEND1_CONTROL 5 -#define EG_BLEND__CB_BLEND2_CONTROL 6 -#define EG_BLEND__CB_BLEND3_CONTROL 7 -#define EG_BLEND__CB_BLEND4_CONTROL 8 -#define EG_BLEND__CB_BLEND5_CONTROL 9 -#define EG_BLEND__CB_BLEND6_CONTROL 10 -#define EG_BLEND__CB_BLEND7_CONTROL 11 -#define EG_BLEND_SIZE 12 -#define EG_BLEND_PM4 128 - -/* EG_DSA */ -#define EG_DSA__DB_STENCIL_CLEAR 0 -#define EG_DSA__DB_DEPTH_CLEAR 1 -#define EG_DSA__SX_ALPHA_TEST_CONTROL 2 -#define EG_DSA__DB_STENCILREFMASK 3 -#define EG_DSA__DB_STENCILREFMASK_BF 4 -#define EG_DSA__SX_ALPHA_REF 5 -#define EG_DSA__SPI_FOG_CNTL 6 -#define EG_DSA__DB_DEPTH_CONTROL 7 -#define EG_DSA__DB_SHADER_CONTROL 8 -#define EG_DSA__DB_RENDER_CONTROL 9 -#define EG_DSA__DB_COUNT_CONTROL 10 -#define EG_DSA__DB_RENDER_OVERRIDE 11 -#define EG_DSA__DB_RENDER_OVERRIDE2 12 -#define EG_DSA__DB_SRESULTS_COMPARE_STATE0 13 -#define EG_DSA__DB_SRESULTS_COMPARE_STATE1 14 -#define EG_DSA__DB_PRELOAD_CONTROL 15 -#define EG_DSA__DB_ALPHA_TO_MASK 16 -#define EG_DSA_SIZE 17 -#define EG_DSA_PM4 128 - -/* EG_VS_SHADER */ -#define EG_VS_SHADER__SQ_VTX_SEMANTIC_0 0 -#define EG_VS_SHADER__SQ_VTX_SEMANTIC_1 1 -#define EG_VS_SHADER__SQ_VTX_SEMANTIC_2 2 -#define EG_VS_SHADER__SQ_VTX_SEMANTIC_3 3 -#define EG_VS_SHADER__SQ_VTX_SEMANTIC_4 4 -#define EG_VS_SHADER__SQ_VTX_SEMANTIC_5 5 -#define EG_VS_SHADER__SQ_VTX_SEMANTIC_6 6 -#define EG_VS_SHADER__SQ_VTX_SEMANTIC_7 7 -#define EG_VS_SHADER__SQ_VTX_SEMANTIC_8 8 -#define EG_VS_SHADER__SQ_VTX_SEMANTIC_9 9 -#define EG_VS_SHADER__SQ_VTX_SEMANTIC_10 10 -#define EG_VS_SHADER__SQ_VTX_SEMANTIC_11 11 -#define EG_VS_SHADER__SQ_VTX_SEMANTIC_12 12 -#define EG_VS_SHADER__SQ_VTX_SEMANTIC_13 13 -#define EG_VS_SHADER__SQ_VTX_SEMANTIC_14 14 -#define EG_VS_SHADER__SQ_VTX_SEMANTIC_15 15 -#define EG_VS_SHADER__SQ_VTX_SEMANTIC_16 16 -#define EG_VS_SHADER__SQ_VTX_SEMANTIC_17 17 -#define EG_VS_SHADER__SQ_VTX_SEMANTIC_18 18 -#define EG_VS_SHADER__SQ_VTX_SEMANTIC_19 19 -#define EG_VS_SHADER__SQ_VTX_SEMANTIC_20 20 -#define EG_VS_SHADER__SQ_VTX_SEMANTIC_21 21 -#define EG_VS_SHADER__SQ_VTX_SEMANTIC_22 22 -#define EG_VS_SHADER__SQ_VTX_SEMANTIC_23 23 -#define EG_VS_SHADER__SQ_VTX_SEMANTIC_24 24 -#define EG_VS_SHADER__SQ_VTX_SEMANTIC_25 25 -#define EG_VS_SHADER__SQ_VTX_SEMANTIC_26 26 -#define EG_VS_SHADER__SQ_VTX_SEMANTIC_27 27 -#define EG_VS_SHADER__SQ_VTX_SEMANTIC_28 28 -#define EG_VS_SHADER__SQ_VTX_SEMANTIC_29 29 -#define EG_VS_SHADER__SQ_VTX_SEMANTIC_30 30 -#define EG_VS_SHADER__SQ_VTX_SEMANTIC_31 31 -#define EG_VS_SHADER__SPI_VS_OUT_ID_0 32 -#define EG_VS_SHADER__SPI_VS_OUT_ID_1 33 -#define EG_VS_SHADER__SPI_VS_OUT_ID_2 34 -#define EG_VS_SHADER__SPI_VS_OUT_ID_3 35 -#define EG_VS_SHADER__SPI_VS_OUT_ID_4 36 -#define EG_VS_SHADER__SPI_VS_OUT_ID_5 37 -#define EG_VS_SHADER__SPI_VS_OUT_ID_6 38 -#define EG_VS_SHADER__SPI_VS_OUT_ID_7 39 -#define EG_VS_SHADER__SPI_VS_OUT_ID_8 40 -#define EG_VS_SHADER__SPI_VS_OUT_ID_9 41 -#define EG_VS_SHADER__SPI_VS_OUT_CONFIG 42 -#define EG_VS_SHADER__SQ_PGM_START_VS 43 -#define EG_VS_SHADER__SQ_PGM_RESOURCES_VS 44 -#define EG_VS_SHADER__SQ_PGM_RESOURCES_2_VS 45 -#define EG_VS_SHADER__SQ_PGM_START_FS 46 -#define EG_VS_SHADER__SQ_PGM_RESOURCES_FS 47 -#define EG_VS_SHADER_SIZE 48 -#define EG_VS_SHADER_PM4 128 - -/* EG_PS_SHADER */ -#define EG_PS_SHADER__SPI_PS_INPUT_CNTL_0 0 -#define EG_PS_SHADER__SPI_PS_INPUT_CNTL_1 1 -#define EG_PS_SHADER__SPI_PS_INPUT_CNTL_2 2 -#define EG_PS_SHADER__SPI_PS_INPUT_CNTL_3 3 -#define EG_PS_SHADER__SPI_PS_INPUT_CNTL_4 4 -#define EG_PS_SHADER__SPI_PS_INPUT_CNTL_5 5 -#define EG_PS_SHADER__SPI_PS_INPUT_CNTL_6 6 -#define EG_PS_SHADER__SPI_PS_INPUT_CNTL_7 7 -#define EG_PS_SHADER__SPI_PS_INPUT_CNTL_8 8 -#define EG_PS_SHADER__SPI_PS_INPUT_CNTL_9 9 -#define EG_PS_SHADER__SPI_PS_INPUT_CNTL_10 10 -#define EG_PS_SHADER__SPI_PS_INPUT_CNTL_11 11 -#define EG_PS_SHADER__SPI_PS_INPUT_CNTL_12 12 -#define EG_PS_SHADER__SPI_PS_INPUT_CNTL_13 13 -#define EG_PS_SHADER__SPI_PS_INPUT_CNTL_14 14 -#define EG_PS_SHADER__SPI_PS_INPUT_CNTL_15 15 -#define EG_PS_SHADER__SPI_PS_INPUT_CNTL_16 16 -#define EG_PS_SHADER__SPI_PS_INPUT_CNTL_17 17 -#define EG_PS_SHADER__SPI_PS_INPUT_CNTL_18 18 -#define EG_PS_SHADER__SPI_PS_INPUT_CNTL_19 19 -#define EG_PS_SHADER__SPI_PS_INPUT_CNTL_20 20 -#define EG_PS_SHADER__SPI_PS_INPUT_CNTL_21 21 -#define EG_PS_SHADER__SPI_PS_INPUT_CNTL_22 22 -#define EG_PS_SHADER__SPI_PS_INPUT_CNTL_23 23 -#define EG_PS_SHADER__SPI_PS_INPUT_CNTL_24 24 -#define EG_PS_SHADER__SPI_PS_INPUT_CNTL_25 25 -#define EG_PS_SHADER__SPI_PS_INPUT_CNTL_26 26 -#define EG_PS_SHADER__SPI_PS_INPUT_CNTL_27 27 -#define EG_PS_SHADER__SPI_PS_INPUT_CNTL_28 28 -#define EG_PS_SHADER__SPI_PS_INPUT_CNTL_29 29 -#define EG_PS_SHADER__SPI_PS_INPUT_CNTL_30 30 -#define EG_PS_SHADER__SPI_PS_INPUT_CNTL_31 31 -#define EG_PS_SHADER__SPI_THREAD_GROUPING 32 -#define EG_PS_SHADER__SPI_PS_IN_CONTROL_0 33 -#define EG_PS_SHADER__SPI_PS_IN_CONTROL_1 34 -#define EG_PS_SHADER__SPI_INPUT_Z 35 -#define EG_PS_SHADER__SPI_BARYC_CNTL 36 -#define EG_PS_SHADER__SPI_PS_IN_CONTROL_2 37 -#define EG_PS_SHADER__SPI_COMPUTE_INPUT_CNTL 38 -#define EG_PS_SHADER__SQ_PGM_START_PS 39 -#define EG_PS_SHADER__SQ_PGM_RESOURCES_PS 40 -#define EG_PS_SHADER__SQ_PGM_RESOURCES_2_PS 41 -#define EG_PS_SHADER__SQ_PGM_EXPORTS_PS 42 -#define EG_PS_SHADER_SIZE 43 -#define EG_PS_SHADER_PM4 128 - -/* EG_UCP */ -#define EG_UCP__PA_CL_UCP0_X 0 -#define EG_UCP__PA_CL_UCP0_Y 1 -#define EG_UCP__PA_CL_UCP0_Z 2 -#define EG_UCP__PA_CL_UCP0_W 3 -#define EG_UCP__PA_CL_UCP1_X 4 -#define EG_UCP__PA_CL_UCP1_Y 5 -#define EG_UCP__PA_CL_UCP1_Z 6 -#define EG_UCP__PA_CL_UCP1_W 7 -#define EG_UCP__PA_CL_UCP2_X 8 -#define EG_UCP__PA_CL_UCP2_Y 9 -#define EG_UCP__PA_CL_UCP2_Z 10 -#define EG_UCP__PA_CL_UCP2_W 11 -#define EG_UCP__PA_CL_UCP3_X 12 -#define EG_UCP__PA_CL_UCP3_Y 13 -#define EG_UCP__PA_CL_UCP3_Z 14 -#define EG_UCP__PA_CL_UCP3_W 15 -#define EG_UCP__PA_CL_UCP4_X 16 -#define EG_UCP__PA_CL_UCP4_Y 17 -#define EG_UCP__PA_CL_UCP4_Z 18 -#define EG_UCP__PA_CL_UCP4_W 19 -#define EG_UCP__PA_CL_UCP5_X 20 -#define EG_UCP__PA_CL_UCP5_Y 21 -#define EG_UCP__PA_CL_UCP5_Z 22 -#define EG_UCP__PA_CL_UCP5_W 23 -#define EG_UCP_SIZE 24 -#define EG_UCP_PM4 128 - -/* EG_VS_CBUF */ -#define EG_VS_CBUF__ALU_CONST_BUFFER_SIZE_VS_0 0 -#define EG_VS_CBUF__ALU_CONST_CACHE_VS_0 1 -#define EG_VS_CBUF_SIZE 2 -#define EG_VS_CBUF_PM4 128 - -/* EG_PS_CBUF */ -#define EG_PS_CBUF__ALU_CONST_BUFFER_SIZE_PS_0 0 -#define EG_PS_CBUF__ALU_CONST_CACHE_PS_0 1 -#define EG_PS_CBUF_SIZE 2 -#define EG_PS_CBUF_PM4 128 - -/* EG_PS_RESOURCE */ -#define EG_PS_RESOURCE__RESOURCE0_WORD0 0 -#define EG_PS_RESOURCE__RESOURCE0_WORD1 1 -#define EG_PS_RESOURCE__RESOURCE0_WORD2 2 -#define EG_PS_RESOURCE__RESOURCE0_WORD3 3 -#define EG_PS_RESOURCE__RESOURCE0_WORD4 4 -#define EG_PS_RESOURCE__RESOURCE0_WORD5 5 -#define EG_PS_RESOURCE__RESOURCE0_WORD6 6 -#define EG_PS_RESOURCE__RESOURCE0_WORD7 7 -#define EG_PS_RESOURCE_SIZE 8 -#define EG_PS_RESOURCE_PM4 128 - -/* EG_VS_RESOURCE */ -#define EG_VS_RESOURCE__RESOURCE160_WORD0 0 -#define EG_VS_RESOURCE__RESOURCE160_WORD1 1 -#define EG_VS_RESOURCE__RESOURCE160_WORD2 2 -#define EG_VS_RESOURCE__RESOURCE160_WORD3 3 -#define EG_VS_RESOURCE__RESOURCE160_WORD4 4 -#define EG_VS_RESOURCE__RESOURCE160_WORD5 5 -#define EG_VS_RESOURCE__RESOURCE160_WORD6 6 -#define EG_VS_RESOURCE__RESOURCE160_WORD7 7 -#define EG_VS_RESOURCE_SIZE 8 -#define EG_VS_RESOURCE_PM4 128 - -/* EG_FS_RESOURCE */ -#define EG_FS_RESOURCE__RESOURCE320_WORD0 0 -#define EG_FS_RESOURCE__RESOURCE320_WORD1 1 -#define EG_FS_RESOURCE__RESOURCE320_WORD2 2 -#define EG_FS_RESOURCE__RESOURCE320_WORD3 3 -#define EG_FS_RESOURCE__RESOURCE320_WORD4 4 -#define EG_FS_RESOURCE__RESOURCE320_WORD5 5 -#define EG_FS_RESOURCE__RESOURCE320_WORD6 6 -#define EG_FS_RESOURCE__RESOURCE320_WORD7 7 -#define EG_FS_RESOURCE_SIZE 8 -#define EG_FS_RESOURCE_PM4 128 - -/* EG_GS_RESOURCE */ -#define EG_GS_RESOURCE__RESOURCE336_WORD0 0 -#define EG_GS_RESOURCE__RESOURCE336_WORD1 1 -#define EG_GS_RESOURCE__RESOURCE336_WORD2 2 -#define EG_GS_RESOURCE__RESOURCE336_WORD3 3 -#define EG_GS_RESOURCE__RESOURCE336_WORD4 4 -#define EG_GS_RESOURCE__RESOURCE336_WORD5 5 -#define EG_GS_RESOURCE__RESOURCE336_WORD6 6 -#define EG_GS_RESOURCE__RESOURCE336_WORD7 7 -#define EG_GS_RESOURCE_SIZE 8 -#define EG_GS_RESOURCE_PM4 128 - -/* EG_PS_SAMPLER */ -#define EG_PS_SAMPLER__SQ_TEX_SAMPLER_WORD0_0 0 -#define EG_PS_SAMPLER__SQ_TEX_SAMPLER_WORD1_0 1 -#define EG_PS_SAMPLER__SQ_TEX_SAMPLER_WORD2_0 2 -#define EG_PS_SAMPLER_SIZE 3 -#define EG_PS_SAMPLER_PM4 128 - -/* EG_VS_SAMPLER */ -#define EG_VS_SAMPLER__SQ_TEX_SAMPLER_WORD0_18 0 -#define EG_VS_SAMPLER__SQ_TEX_SAMPLER_WORD1_18 1 -#define EG_VS_SAMPLER__SQ_TEX_SAMPLER_WORD2_18 2 -#define EG_VS_SAMPLER_SIZE 3 -#define EG_VS_SAMPLER_PM4 128 - -/* EG_GS_SAMPLER */ -#define EG_GS_SAMPLER__SQ_TEX_SAMPLER_WORD0_36 0 -#define EG_GS_SAMPLER__SQ_TEX_SAMPLER_WORD1_36 1 -#define EG_GS_SAMPLER__SQ_TEX_SAMPLER_WORD2_36 2 -#define EG_GS_SAMPLER_SIZE 3 -#define EG_GS_SAMPLER_PM4 128 - -/* EG_PS_SAMPLER_BORDER */ -#define EG_PS_SAMPLER_BORDER__TD_PS_SAMPLER0_BORDER_INDEX 0 -#define EG_PS_SAMPLER_BORDER__TD_PS_SAMPLER0_BORDER_RED 1 -#define EG_PS_SAMPLER_BORDER__TD_PS_SAMPLER0_BORDER_GREEN 2 -#define EG_PS_SAMPLER_BORDER__TD_PS_SAMPLER0_BORDER_BLUE 3 -#define EG_PS_SAMPLER_BORDER__TD_PS_SAMPLER0_BORDER_ALPHA 4 -#define EG_PS_SAMPLER_BORDER_SIZE 5 -#define EG_PS_SAMPLER_BORDER_PM4 128 - -/* EG_VS_SAMPLER_BORDER */ -#define EG_VS_SAMPLER_BORDER__TD_VS_SAMPLER0_BORDER_INDEX 0 -#define EG_VS_SAMPLER_BORDER__TD_VS_SAMPLER0_BORDER_RED 1 -#define EG_VS_SAMPLER_BORDER__TD_VS_SAMPLER0_BORDER_GREEN 2 -#define EG_VS_SAMPLER_BORDER__TD_VS_SAMPLER0_BORDER_BLUE 3 -#define EG_VS_SAMPLER_BORDER__TD_VS_SAMPLER0_BORDER_ALPHA 4 -#define EG_VS_SAMPLER_BORDER_SIZE 5 -#define EG_VS_SAMPLER_BORDER_PM4 128 - -/* EG_GS_SAMPLER_BORDER */ -#define EG_GS_SAMPLER_BORDER__TD_GS_SAMPLER0_BORDER_INDEX 0 -#define EG_GS_SAMPLER_BORDER__TD_GS_SAMPLER0_BORDER_RED 1 -#define EG_GS_SAMPLER_BORDER__TD_GS_SAMPLER0_BORDER_GREEN 2 -#define EG_GS_SAMPLER_BORDER__TD_GS_SAMPLER0_BORDER_BLUE 3 -#define EG_GS_SAMPLER_BORDER__TD_GS_SAMPLER0_BORDER_ALPHA 4 -#define EG_GS_SAMPLER_BORDER_SIZE 5 -#define EG_GS_SAMPLER_BORDER_PM4 128 - -/* EG_CB */ -#define EG_CB__CB_COLOR0_BASE 0 -#define EG_CB__CB_COLOR0_PITCH 1 -#define EG_CB__CB_COLOR0_SLICE 2 -#define EG_CB__CB_COLOR0_VIEW 3 -#define EG_CB__CB_COLOR0_INFO 4 -#define EG_CB__CB_COLOR0_ATTRIB 5 -#define EG_CB__CB_COLOR0_DIM 6 -#define EG_CB_SIZE 7 -#define EG_CB_PM4 128 - -/* EG_DB */ -#define EG_DB__DB_HTILE_DATA_BASE 0 -#define EG_DB__DB_Z_INFO 1 -#define EG_DB__DB_STENCIL_INFO 2 -#define EG_DB__DB_DEPTH_SIZE 3 -#define EG_DB__DB_DEPTH_SLICE 4 -#define EG_DB__DB_DEPTH_VIEW 5 -#define EG_DB__DB_HTILE_SURFACE 6 -#define EG_DB__DB_Z_READ_BASE 7 -#define EG_DB__DB_STENCIL_READ_BASE 8 -#define EG_DB__DB_Z_WRITE_BASE 9 -#define EG_DB__DB_STENCIL_WRITE_BASE 10 -#define EG_DB_SIZE 11 -#define EG_DB_PM4 128 - -/* EG_VGT */ -#define EG_VGT__VGT_PRIMITIVE_TYPE 0 -#define EG_VGT__VGT_MAX_VTX_INDX 1 -#define EG_VGT__VGT_MIN_VTX_INDX 2 -#define EG_VGT__VGT_INDX_OFFSET 3 -#define EG_VGT__VGT_DMA_INDEX_TYPE 4 -#define EG_VGT__VGT_PRIMITIVEID_EN 5 -#define EG_VGT__VGT_DMA_NUM_INSTANCES 6 -#define EG_VGT__VGT_MULTI_PRIM_IB_RESET_EN 7 -#define EG_VGT__VGT_INSTANCE_STEP_RATE_0 8 -#define EG_VGT__VGT_INSTANCE_STEP_RATE_1 9 -#define EG_VGT_SIZE 10 -#define EG_VGT_PM4 128 - -/* EG_DRAW */ -#define EG_DRAW__VGT_NUM_INDICES 0 -#define EG_DRAW__VGT_DMA_BASE_HI 1 -#define EG_DRAW__VGT_DMA_BASE 2 -#define EG_DRAW__VGT_DRAW_INITIATOR 3 -#define EG_DRAW_SIZE 4 -#define EG_DRAW_PM4 128 - -/* EG_VGT_EVENT */ -#define EG_VGT_EVENT__VGT_EVENT_INITIATOR 0 -#define EG_VGT_EVENT_SIZE 1 -#define EG_VGT_EVENT_PM4 128 - -/* EG_CB_FLUSH */ -#define EG_CB_FLUSH_SIZE 0 -#define EG_CB_FLUSH_PM4 128 - -/* EG_DB_FLUSH */ -#define EG_DB_FLUSH_SIZE 0 -#define EG_DB_FLUSH_PM4 128 - diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c index 9e1a5e1f98..77432661b6 100644 --- a/src/gallium/drivers/r600/evergreen_state.c +++ b/src/gallium/drivers/r600/evergreen_state.c @@ -103,7 +103,7 @@ static void *evergreen_create_blend_state(struct pipe_context *ctx, } blend->cb_target_mask = target_mask; r600_pipe_state_add_reg(rstate, R_028808_CB_COLOR_CONTROL, - color_control, 0xFFFFFFFF, NULL); + color_control, 0xFFFFFFFD, NULL); r600_pipe_state_add_reg(rstate, R_028C3C_PA_SC_AA_MASK, 0xFFFFFFFF, 0xFFFFFFFF, NULL); for (int i = 0; i < 8; i++) { @@ -150,10 +150,6 @@ static void *evergreen_create_dsa_state(struct pipe_context *ctx, rstate->id = R600_PIPE_STATE_DSA; /* depth TODO some of those db_shader_control field depend on shader adjust mask & add it to shader */ - /* db_shader_control is 0xFFFFFFBE as Z_EXPORT_ENABLE (bit 0) will be - * set by fragment shader if it export Z and KILL_ENABLE (bit 6) will - * be set if shader use texkill instruction - */ db_shader_control = S_02880C_Z_ORDER(V_02880C_EARLY_Z_THEN_LATE_Z); stencil_ref_mask = 0; stencil_ref_mask_bf = 0; @@ -210,7 +206,10 @@ static void *evergreen_create_dsa_state(struct pipe_context *ctx, r600_pipe_state_add_reg(rstate, R_028438_SX_ALPHA_REF, alpha_ref, 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_0286DC_SPI_FOG_CNTL, 0x00000000, 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_028800_DB_DEPTH_CONTROL, db_depth_control, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_02880C_DB_SHADER_CONTROL, db_shader_control, 0xFFFFFFBE, NULL); + /* The DB_SHADER_CONTROL mask is 0xFFFFFFBC since Z_EXPORT_ENABLE, + * STENCIL_EXPORT_ENABLE and KILL_ENABLE are controlled by + * evergreen_pipe_shader_ps().*/ + r600_pipe_state_add_reg(rstate, R_02880C_DB_SHADER_CONTROL, db_shader_control, 0xFFFFFFBC, NULL); r600_pipe_state_add_reg(rstate, R_028000_DB_RENDER_CONTROL, db_render_control, 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_02800C_DB_RENDER_OVERRIDE, db_render_override, 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_028AC0_DB_SRESULTS_COMPARE_STATE0, 0x0, 0xFFFFFFFF, NULL); @@ -305,11 +304,16 @@ static void *evergreen_create_sampler_state(struct pipe_context *ctx, { struct r600_pipe_state *rstate = CALLOC_STRUCT(r600_pipe_state); union util_color uc; + uint32_t coord_trunc = 0; if (rstate == NULL) { return NULL; } + if ((state->mag_img_filter == PIPE_TEX_FILTER_NEAREST) || + (state->min_img_filter == PIPE_TEX_FILTER_NEAREST)) + coord_trunc = 1; + rstate->id = R600_PIPE_STATE_SAMPLER; util_pack_color(state->border_color, PIPE_FORMAT_B8G8R8A8_UNORM, &uc); r600_pipe_state_add_reg(rstate, R_03C000_SQ_TEX_SAMPLER_WORD0_0, @@ -328,6 +332,7 @@ static void *evergreen_create_sampler_state(struct pipe_context *ctx, 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_03C008_SQ_TEX_SAMPLER_WORD2_0, S_03C008_LOD_BIAS(S_FIXED(CLAMP(state->lod_bias, -16, 16), 8)) | + S_03C008_MC_COORD_TRUNCATE(coord_trunc) | S_03C008_TYPE(1), 0xFFFFFFFF, NULL); @@ -351,7 +356,7 @@ static struct pipe_sampler_view *evergreen_create_sampler_view(struct pipe_conte struct r600_resource *rbuffer; unsigned format; uint32_t word4 = 0, yuv_format = 0, pitch = 0; - unsigned char swizzle[4]; + unsigned char swizzle[4], array_mode = 0, tile_type = 0; struct r600_bo *bo[2]; if (resource == NULL) @@ -370,7 +375,7 @@ static struct pipe_sampler_view *evergreen_create_sampler_view(struct pipe_conte swizzle[1] = state->swizzle_g; swizzle[2] = state->swizzle_b; swizzle[3] = state->swizzle_a; - format = r600_translate_texformat(state->format, + format = r600_translate_texformat(ctx->screen, state->format, swizzle, &word4, &yuv_format); if (format == ~0) { @@ -380,36 +385,43 @@ static struct pipe_sampler_view *evergreen_create_sampler_view(struct pipe_conte if (desc == NULL) { R600_ERR("unknow format %d\n", state->format); } - tmp = (struct r600_resource_texture*)texture; + tmp = (struct r600_resource_texture *)texture; + if (tmp->depth && !tmp->is_flushing_texture) { + r600_texture_depth_flush(ctx, texture, TRUE); + tmp = tmp->flushed_depth_texture; + } + + if (tmp->force_int_type) { + word4 &= C_030010_NUM_FORMAT_ALL; + word4 |= S_030010_NUM_FORMAT_ALL(V_030010_SQ_NUM_FORMAT_INT); + } + rbuffer = &tmp->resource; bo[0] = rbuffer->bo; bo[1] = rbuffer->bo; - /* FIXME depth texture decompression */ - if (tmp->depth) { - r600_texture_depth_flush(ctx, texture); - tmp = (struct r600_resource_texture*)texture; - rbuffer = &tmp->flushed_depth_texture->resource; - bo[0] = rbuffer->bo; - bo[1] = rbuffer->bo; - } - pitch = align(tmp->pitch_in_pixels[0], 8); + + pitch = align(tmp->pitch_in_blocks[0] * util_format_get_blockwidth(state->format), 8); + array_mode = tmp->array_mode[0]; + tile_type = tmp->tile_type; /* FIXME properly handle first level != 0 */ r600_pipe_state_add_reg(rstate, R_030000_RESOURCE0_WORD0, S_030000_DIM(r600_tex_dim(texture->target)) | S_030000_PITCH((pitch / 8) - 1) | + S_030000_NON_DISP_TILING_ORDER(tile_type) | S_030000_TEX_WIDTH(texture->width0 - 1), 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_030004_RESOURCE0_WORD1, S_030004_TEX_HEIGHT(texture->height0 - 1) | - S_030004_TEX_DEPTH(texture->depth0 - 1), + S_030004_TEX_DEPTH(texture->depth0 - 1) | + S_030004_ARRAY_MODE(array_mode), 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_030008_RESOURCE0_WORD2, (tmp->offset[0] + r600_bo_offset(bo[0])) >> 8, 0xFFFFFFFF, bo[0]); r600_pipe_state_add_reg(rstate, R_03000C_RESOURCE0_WORD3, (tmp->offset[1] + r600_bo_offset(bo[1])) >> 8, 0xFFFFFFFF, bo[1]); r600_pipe_state_add_reg(rstate, R_030010_RESOURCE0_WORD4, - word4 | S_030010_NUM_FORMAT_ALL(V_030010_SQ_NUM_FORMAT_NORM) | - S_030010_SRF_MODE_ALL(V_030010_SFR_MODE_NO_ZERO) | + word4 | + S_030010_SRF_MODE_ALL(V_030010_SRF_MODE_NO_ZERO) | S_030010_BASE_LEVEL(state->u.tex.first_level), 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_030014_RESOURCE0_WORD5, S_030014_LAST_LEVEL(state->u.tex.last_level) | @@ -431,7 +443,8 @@ static void evergreen_set_vs_sampler_view(struct pipe_context *ctx, unsigned cou for (int i = 0; i < count; i++) { if (resource[i]) { - evergreen_context_pipe_state_set_vs_resource(&rctx->ctx, &resource[i]->state, i); + evergreen_context_pipe_state_set_vs_resource(&rctx->ctx, &resource[i]->state, + i + R600_MAX_CONST_BUFFERS); } } } @@ -446,9 +459,11 @@ static void evergreen_set_ps_sampler_view(struct pipe_context *ctx, unsigned cou for (i = 0; i < count; i++) { if (&rctx->ps_samplers.views[i]->base != views[i]) { if (resource[i]) - evergreen_context_pipe_state_set_ps_resource(&rctx->ctx, &resource[i]->state, i); + evergreen_context_pipe_state_set_ps_resource(&rctx->ctx, &resource[i]->state, + i + R600_MAX_CONST_BUFFERS); else - evergreen_context_pipe_state_set_ps_resource(&rctx->ctx, NULL, i); + evergreen_context_pipe_state_set_ps_resource(&rctx->ctx, NULL, + i + R600_MAX_CONST_BUFFERS); pipe_sampler_view_reference( (struct pipe_sampler_view **)&rctx->ps_samplers.views[i], @@ -457,7 +472,8 @@ static void evergreen_set_ps_sampler_view(struct pipe_context *ctx, unsigned cou } for (i = count; i < NUM_TEX_UNITS; i++) { if (rctx->ps_samplers.views[i]) { - evergreen_context_pipe_state_set_ps_resource(&rctx->ctx, NULL, i); + evergreen_context_pipe_state_set_ps_resource(&rctx->ctx, NULL, + i + R600_MAX_CONST_BUFFERS); pipe_sampler_view_reference((struct pipe_sampler_view **)&rctx->ps_samplers.views[i], NULL); } } @@ -638,11 +654,19 @@ static void evergreen_cb(struct r600_pipe_context *rctx, struct r600_pipe_state unsigned color_info; unsigned format, swap, ntype; unsigned offset; + unsigned tile_type; const struct util_format_description *desc; struct r600_bo *bo[3]; + int i; surf = (struct r600_surface *)state->cbufs[cb]; rtex = (struct r600_resource_texture*)state->cbufs[cb]->texture; + + if (rtex->depth && !rtex->is_flushing_texture) { + r600_texture_depth_flush(&rctx->context, state->cbufs[cb]->texture, TRUE); + rtex = rtex->flushed_depth_texture; + } + rbuffer = &rtex->resource; bo[0] = rbuffer->bo; bo[1] = rbuffer->bo; @@ -651,21 +675,43 @@ static void evergreen_cb(struct r600_pipe_context *rctx, struct r600_pipe_state /* XXX quite sure for dx10+ hw don't need any offset hacks */ offset = r600_texture_get_offset((struct r600_resource_texture *)state->cbufs[cb]->texture, level, state->cbufs[cb]->u.tex.first_layer); - pitch = rtex->pitch_in_pixels[level] / 8 - 1; - slice = rtex->pitch_in_pixels[level] * surf->aligned_height / 64 - 1; + pitch = rtex->pitch_in_blocks[level] / 8 - 1; + slice = rtex->pitch_in_blocks[level] * surf->aligned_height / 64 - 1; ntype = 0; - desc = util_format_description(rtex->resource.base.b.format); + desc = util_format_description(surf->base.format); if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) ntype = V_028C70_NUMBER_SRGB; - format = r600_translate_colorformat(rtex->resource.base.b.format); - swap = r600_translate_colorswap(rtex->resource.base.b.format); + format = r600_translate_colorformat(surf->base.format); + swap = r600_translate_colorswap(surf->base.format); + + /* disable when gallium grows int textures */ + if ((format == FMT_32_32_32_32 || format == FMT_16_16_16_16) && rtex->force_int_type) + ntype = 4; + color_info = S_028C70_FORMAT(format) | S_028C70_COMP_SWAP(swap) | + S_028C70_ARRAY_MODE(rtex->array_mode[level]) | S_028C70_BLEND_CLAMP(1) | S_028C70_NUMBER_TYPE(ntype); - if (desc->colorspace != UTIL_FORMAT_COLORSPACE_ZS) - color_info |= S_028C70_SOURCE_FORMAT(1); + + for (i = 0; i < 4; i++) { + if (desc->channel[i].type != UTIL_FORMAT_TYPE_VOID) { + break; + } + } + + /* we can only set the export size if any thing is snorm/unorm component is > 11 bits, + if we aren't a float, sint or uint */ + if (desc->colorspace != UTIL_FORMAT_COLORSPACE_ZS && + desc->channel[i].size < 12 && desc->channel[i].type != UTIL_FORMAT_TYPE_FLOAT && + ntype != 4 && ntype != 5) + color_info |= S_028C70_SOURCE_FORMAT(V_028C70_EXPORT_4C_16BPC); + + if (rtex->array_mode[level] > V_028C70_ARRAY_LINEAR_ALIGNED) { + tile_type = rtex->tile_type; + } else /* workaround for linear buffers */ + tile_type = 1; /* FIXME handle enabling of CB beyond BASE8 which has different offset */ r600_pipe_state_add_reg(rstate, @@ -690,7 +736,7 @@ static void evergreen_cb(struct r600_pipe_context *rctx, struct r600_pipe_state 0x00000000, 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_028C74_CB_COLOR0_ATTRIB + cb * 0x3C, - S_028C74_NON_DISP_TILING_ORDER(1), + S_028C74_NON_DISP_TILING_ORDER(tile_type), 0xFFFFFFFF, bo[0]); } @@ -711,17 +757,14 @@ static void evergreen_db(struct r600_pipe_context *rctx, struct r600_pipe_state surf = (struct r600_surface *)state->zsbuf; rtex = (struct r600_resource_texture*)state->zsbuf->texture; - rtex->tiled = 1; - rtex->array_mode[level] = 2; - rtex->tile_type = 1; - rtex->depth = 1; + rbuffer = &rtex->resource; /* XXX quite sure for dx10+ hw don't need any offset hacks */ offset = r600_texture_get_offset((struct r600_resource_texture *)state->zsbuf->texture, level, state->zsbuf->u.tex.first_layer); - pitch = rtex->pitch_in_pixels[level] / 8 - 1; - slice = rtex->pitch_in_pixels[level] * surf->aligned_height / 64 - 1; + pitch = rtex->pitch_in_blocks[level] / 8 - 1; + slice = rtex->pitch_in_blocks[level] * surf->aligned_height / 64 - 1; format = r600_translate_dbformat(state->zsbuf->texture->format); stencil_format = r600_translate_stencilformat(state->zsbuf->texture->format); @@ -770,8 +813,6 @@ static void evergreen_set_framebuffer_state(struct pipe_context *ctx, util_copy_framebuffer_state(&rctx->framebuffer, state); - rctx->pframebuffer = &rctx->framebuffer; - /* build states */ for (int i = 0; i < state->nr_cbufs; i++) { evergreen_cb(rctx, rstate, state, i); @@ -839,48 +880,6 @@ static void evergreen_set_framebuffer_state(struct pipe_context *ctx, } } -static void evergreen_set_constant_buffer(struct pipe_context *ctx, uint shader, uint index, - struct pipe_resource *buffer) -{ - struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - struct r600_resource *rbuffer = (struct r600_resource*)buffer; - - /* Note that the state tracker can unbind constant buffers by - * passing NULL here. - */ - if (buffer == NULL) { - return; - } - - switch (shader) { - case PIPE_SHADER_VERTEX: - rctx->vs_const_buffer.nregs = 0; - r600_pipe_state_add_reg(&rctx->vs_const_buffer, - R_028180_ALU_CONST_BUFFER_SIZE_VS_0, - ALIGN_DIVUP(buffer->width0 >> 4, 16), - 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(&rctx->vs_const_buffer, - R_028980_ALU_CONST_CACHE_VS_0, - (r600_bo_offset(rbuffer->bo)) >> 8, 0xFFFFFFFF, rbuffer->bo); - r600_context_pipe_state_set(&rctx->ctx, &rctx->vs_const_buffer); - break; - case PIPE_SHADER_FRAGMENT: - rctx->ps_const_buffer.nregs = 0; - r600_pipe_state_add_reg(&rctx->ps_const_buffer, - R_028140_ALU_CONST_BUFFER_SIZE_PS_0, - ALIGN_DIVUP(buffer->width0 >> 4, 16), - 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(&rctx->ps_const_buffer, - R_028940_ALU_CONST_CACHE_PS_0, - (r600_bo_offset(rbuffer->bo)) >> 8, 0xFFFFFFFF, rbuffer->bo); - r600_context_pipe_state_set(&rctx->ctx, &rctx->ps_const_buffer); - break; - default: - R600_ERR("unsupported %d\n", shader); - return; - } -} - void evergreen_init_state_functions(struct r600_pipe_context *rctx) { rctx->context.create_blend_state = evergreen_create_blend_state; @@ -908,7 +907,7 @@ void evergreen_init_state_functions(struct r600_pipe_context *rctx) rctx->context.delete_vs_state = r600_delete_vs_shader; rctx->context.set_blend_color = evergreen_set_blend_color; rctx->context.set_clip_state = evergreen_set_clip_state; - rctx->context.set_constant_buffer = evergreen_set_constant_buffer; + rctx->context.set_constant_buffer = r600_set_constant_buffer; rctx->context.set_fragment_sampler_views = evergreen_set_ps_sampler_view; rctx->context.set_framebuffer_state = evergreen_set_framebuffer_state; rctx->context.set_polygon_stipple = evergreen_set_polygon_stipple; @@ -920,6 +919,7 @@ void evergreen_init_state_functions(struct r600_pipe_context *rctx) rctx->context.set_vertex_sampler_views = evergreen_set_vs_sampler_view; rctx->context.set_viewport_state = evergreen_set_viewport_state; rctx->context.sampler_view_destroy = r600_sampler_view_destroy; + rctx->context.redefine_user_buffer = u_default_redefine_user_buffer; } void evergreen_init_config(struct r600_pipe_context *rctx) @@ -1069,12 +1069,76 @@ void evergreen_init_config(struct r600_pipe_context *rctx) num_hs_stack_entries = 42; num_ls_stack_entries = 42; break; + case CHIP_BARTS: + num_ps_gprs = 93; + num_vs_gprs = 46; + num_temp_gprs = 4; + num_gs_gprs = 31; + num_es_gprs = 31; + num_hs_gprs = 23; + num_ls_gprs = 23; + num_ps_threads = 128; + num_vs_threads = 20; + num_gs_threads = 20; + num_es_threads = 20; + num_hs_threads = 20; + num_ls_threads = 20; + num_ps_stack_entries = 85; + num_vs_stack_entries = 85; + num_gs_stack_entries = 85; + num_es_stack_entries = 85; + num_hs_stack_entries = 85; + num_ls_stack_entries = 85; + break; + case CHIP_TURKS: + num_ps_gprs = 93; + num_vs_gprs = 46; + num_temp_gprs = 4; + num_gs_gprs = 31; + num_es_gprs = 31; + num_hs_gprs = 23; + num_ls_gprs = 23; + num_ps_threads = 128; + num_vs_threads = 20; + num_gs_threads = 20; + num_es_threads = 20; + num_hs_threads = 20; + num_ls_threads = 20; + num_ps_stack_entries = 42; + num_vs_stack_entries = 42; + num_gs_stack_entries = 42; + num_es_stack_entries = 42; + num_hs_stack_entries = 42; + num_ls_stack_entries = 42; + break; + case CHIP_CAICOS: + num_ps_gprs = 93; + num_vs_gprs = 46; + num_temp_gprs = 4; + num_gs_gprs = 31; + num_es_gprs = 31; + num_hs_gprs = 23; + num_ls_gprs = 23; + num_ps_threads = 128; + num_vs_threads = 10; + num_gs_threads = 10; + num_es_threads = 10; + num_hs_threads = 10; + num_ls_threads = 10; + num_ps_stack_entries = 42; + num_vs_stack_entries = 42; + num_gs_stack_entries = 42; + num_es_stack_entries = 42; + num_hs_stack_entries = 42; + num_ls_stack_entries = 42; + break; } tmp = 0x00000000; switch (family) { case CHIP_CEDAR: case CHIP_PALM: + case CHIP_CAICOS: break; default: tmp |= S_008C00_VC_ENABLE(1); @@ -1260,226 +1324,11 @@ void evergreen_polygon_offset_update(struct r600_pipe_context *rctx) } } -static void evergreen_spi_update(struct r600_pipe_context *rctx) -{ - struct r600_pipe_shader *shader = rctx->ps_shader; - struct r600_pipe_state rstate; - struct r600_shader *rshader = &shader->shader; - unsigned i, tmp; - - rstate.nregs = 0; - for (i = 0; i < rshader->ninput; i++) { - tmp = S_028644_SEMANTIC(r600_find_vs_semantic_index(&rctx->vs_shader->shader, rshader, i)); - if (rshader->input[i].name == TGSI_SEMANTIC_COLOR || - rshader->input[i].name == TGSI_SEMANTIC_BCOLOR || - rshader->input[i].name == TGSI_SEMANTIC_POSITION) { - tmp |= S_028644_FLAT_SHADE(rctx->flatshade); - } - if (rshader->input[i].name == TGSI_SEMANTIC_GENERIC && - rctx->sprite_coord_enable & (1 << rshader->input[i].sid)) { - tmp |= S_028644_PT_SPRITE_TEX(1); - } - r600_pipe_state_add_reg(&rstate, R_028644_SPI_PS_INPUT_CNTL_0 + i * 4, tmp, 0xFFFFFFFF, NULL); - } - r600_context_pipe_state_set(&rctx->ctx, &rstate); -} - -void evergreen_vertex_buffer_update(struct r600_pipe_context *rctx) -{ - struct r600_pipe_state *rstate; - struct r600_resource *rbuffer; - struct pipe_vertex_buffer *vertex_buffer; - unsigned i, offset; - - /* we don't update until we know vertex elements */ - if (rctx->vertex_elements == NULL || !rctx->nvertex_buffer) - return; - - /* delete previous translated vertex elements */ - if (rctx->tran.new_velems) { - r600_end_vertex_translate(rctx); - } - - if (rctx->vertex_elements->incompatible_layout) { - /* translate rebind new vertex elements so - * return once translated - */ - r600_begin_vertex_translate(rctx); - return; - } - - if (rctx->any_user_vbs) { - r600_upload_user_buffers(rctx); - rctx->any_user_vbs = FALSE; - } - - if (rctx->vertex_elements->vbuffer_need_offset) { - /* one resource per vertex elements */ - rctx->nvs_resource = rctx->vertex_elements->count; - } else { - /* bind vertex buffer once */ - rctx->nvs_resource = rctx->nvertex_buffer; - } - - for (i = 0 ; i < rctx->nvs_resource; i++) { - rstate = &rctx->vs_resource[i]; - rstate->id = R600_PIPE_STATE_RESOURCE; - rstate->nregs = 0; - - if (rctx->vertex_elements->vbuffer_need_offset) { - /* one resource per vertex elements */ - unsigned vbuffer_index; - vbuffer_index = rctx->vertex_elements->elements[i].vertex_buffer_index; - vertex_buffer = &rctx->vertex_buffer[vbuffer_index]; - rbuffer = (struct r600_resource*)vertex_buffer->buffer; - offset = rctx->vertex_elements->vbuffer_offset[i] + - vertex_buffer->buffer_offset + - r600_bo_offset(rbuffer->bo); - } else { - /* bind vertex buffer once */ - vertex_buffer = &rctx->vertex_buffer[i]; - rbuffer = (struct r600_resource*)vertex_buffer->buffer; - offset = vertex_buffer->buffer_offset + - r600_bo_offset(rbuffer->bo); - } - - r600_pipe_state_add_reg(rstate, R_030000_RESOURCE0_WORD0, - offset, 0xFFFFFFFF, rbuffer->bo); - r600_pipe_state_add_reg(rstate, R_030004_RESOURCE0_WORD1, - rbuffer->size - offset - 1, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_030008_RESOURCE0_WORD2, - S_030008_STRIDE(vertex_buffer->stride), - 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_03000C_RESOURCE0_WORD3, - S_03000C_DST_SEL_X(V_03000C_SQ_SEL_X) | - S_03000C_DST_SEL_Y(V_03000C_SQ_SEL_Y) | - S_03000C_DST_SEL_Z(V_03000C_SQ_SEL_Z) | - S_03000C_DST_SEL_W(V_03000C_SQ_SEL_W), - 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_030010_RESOURCE0_WORD4, - 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_030014_RESOURCE0_WORD5, - 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_030018_RESOURCE0_WORD6, - 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_03001C_RESOURCE0_WORD7, - 0xC0000000, 0xFFFFFFFF, NULL); - evergreen_fs_resource_set(&rctx->ctx, rstate, i); - } -} - -int r600_conv_pipe_prim(unsigned pprim, unsigned *prim); -void evergreen_draw(struct pipe_context *ctx, const struct pipe_draw_info *info) -{ - struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - struct r600_resource *rbuffer; - u32 vgt_dma_index_type, vgt_draw_initiator, mask; - struct r600_draw rdraw; - struct r600_pipe_state vgt; - struct r600_drawl draw; - unsigned prim; - - memset(&draw, 0, sizeof(struct r600_drawl)); - draw.ctx = ctx; - draw.mode = info->mode; - draw.start = info->start; - draw.count = info->count; - if (info->indexed && rctx->index_buffer.buffer) { - draw.start += rctx->index_buffer.offset / rctx->index_buffer.index_size; - draw.min_index = info->min_index; - draw.max_index = info->max_index; - draw.index_bias = info->index_bias; - - r600_translate_index_buffer(rctx, &rctx->index_buffer.buffer, - &rctx->index_buffer.index_size, - &draw.start, - info->count); - - draw.index_size = rctx->index_buffer.index_size; - pipe_resource_reference(&draw.index_buffer, rctx->index_buffer.buffer); - draw.index_buffer_offset = draw.start * draw.index_size; - draw.start = 0; - r600_upload_index_buffer(rctx, &draw); - } else { - draw.index_size = 0; - draw.index_buffer = NULL; - draw.min_index = info->min_index; - draw.max_index = info->max_index; - draw.index_bias = info->start; - } - - switch (draw.index_size) { - case 2: - vgt_draw_initiator = 0; - vgt_dma_index_type = 0; - break; - case 4: - vgt_draw_initiator = 0; - vgt_dma_index_type = 1; - break; - case 0: - vgt_draw_initiator = 2; - vgt_dma_index_type = 0; - break; - default: - R600_ERR("unsupported index size %d\n", draw.index_size); - return; - } - if (r600_conv_pipe_prim(draw.mode, &prim)) - return; - if (unlikely(rctx->ps_shader == NULL)) { - R600_ERR("missing vertex shader\n"); - return; - } - if (unlikely(rctx->vs_shader == NULL)) { - R600_ERR("missing vertex shader\n"); - return; - } - /* there should be enough input */ - if (rctx->vertex_elements->count < rctx->vs_shader->shader.bc.nresource) { - R600_ERR("%d resources provided, expecting %d\n", - rctx->vertex_elements->count, rctx->vs_shader->shader.bc.nresource); - return; - } - - evergreen_spi_update(rctx); - - mask = 0; - for (int i = 0; i < rctx->framebuffer.nr_cbufs; i++) { - mask |= (0xF << (i * 4)); - } - - vgt.id = R600_PIPE_STATE_VGT; - vgt.nregs = 0; - r600_pipe_state_add_reg(&vgt, R_008958_VGT_PRIMITIVE_TYPE, prim, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(&vgt, R_028408_VGT_INDX_OFFSET, draw.index_bias, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(&vgt, R_028238_CB_TARGET_MASK, rctx->cb_target_mask & mask, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(&vgt, R_028400_VGT_MAX_VTX_INDX, draw.max_index, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(&vgt, R_028404_VGT_MIN_VTX_INDX, draw.min_index, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(&vgt, R_03CFF0_SQ_VTX_BASE_VTX_LOC, 0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(&vgt, R_03CFF4_SQ_VTX_START_INST_LOC, 0, 0xFFFFFFFF, NULL); - r600_context_pipe_state_set(&rctx->ctx, &vgt); - - rdraw.vgt_num_indices = draw.count; - rdraw.vgt_num_instances = 1; - rdraw.vgt_index_type = vgt_dma_index_type; - rdraw.vgt_draw_initiator = vgt_draw_initiator; - rdraw.indices = NULL; - if (draw.index_buffer) { - rbuffer = (struct r600_resource*)draw.index_buffer; - rdraw.indices = rbuffer->bo; - rdraw.indices_bo_offset = draw.index_buffer_offset; - } - evergreen_context_draw(&rctx->ctx, &rdraw); - - pipe_resource_reference(&draw.index_buffer, NULL); -} - void evergreen_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader *shader) { struct r600_pipe_state *rstate = &shader->rstate; struct r600_shader *rshader = &shader->shader; - unsigned i, exports_ps, num_cout, spi_ps_in_control_0, spi_input_z, spi_ps_in_control_1; + unsigned i, exports_ps, num_cout, spi_ps_in_control_0, spi_input_z, spi_ps_in_control_1, db_shader_control; int pos_index = -1, face_index = -1; int ninterp = 0; boolean have_linear = FALSE, have_centroid = FALSE, have_perspective = FALSE; @@ -1487,6 +1336,7 @@ void evergreen_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader rstate->nregs = 0; + db_shader_control = 0; for (i = 0; i < rshader->ninput; i++) { /* evergreen NUM_INTERP only contains values interpolated into the LDS, POSITION goes via GPRs from the SC so isn't counted */ @@ -1508,16 +1358,12 @@ void evergreen_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader } for (i = 0; i < rshader->noutput; i++) { if (rshader->output[i].name == TGSI_SEMANTIC_POSITION) - r600_pipe_state_add_reg(rstate, - R_02880C_DB_SHADER_CONTROL, - S_02880C_Z_EXPORT_ENABLE(1), - S_02880C_Z_EXPORT_ENABLE(1), NULL); + db_shader_control |= S_02880C_Z_EXPORT_ENABLE(1); if (rshader->output[i].name == TGSI_SEMANTIC_STENCIL) - r600_pipe_state_add_reg(rstate, - R_02880C_DB_SHADER_CONTROL, - S_02880C_STENCIL_EXPORT_ENABLE(1), - S_02880C_STENCIL_EXPORT_ENABLE(1), NULL); + db_shader_control |= S_02880C_STENCIL_EXPORT_ENABLE(1); } + if (rshader->uses_kill) + db_shader_control |= S_02880C_KILL_ENABLE(1); exports_ps = 0; num_cout = 0; @@ -1592,15 +1438,15 @@ void evergreen_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader r600_pipe_state_add_reg(rstate, R_02884C_SQ_PGM_EXPORTS_PS, exports_ps, 0xFFFFFFFF, NULL); - - if (rshader->uses_kill) { - /* only set some bits here, the other bits are set in the dsa state */ - r600_pipe_state_add_reg(rstate, - R_02880C_DB_SHADER_CONTROL, - S_02880C_KILL_ENABLE(1), - S_02880C_KILL_ENABLE(1), NULL); - } - + /* FIXME: Evergreen doesn't seem to support MULTIWRITE_ENABLE. */ + /* only set some bits here, the other bits are set in the dsa state */ + r600_pipe_state_add_reg(rstate, + R_02880C_DB_SHADER_CONTROL, + db_shader_control, + S_02880C_Z_EXPORT_ENABLE(1) | + S_02880C_STENCIL_EXPORT_ENABLE(1) | + S_02880C_KILL_ENABLE(1), + NULL); r600_pipe_state_add_reg(rstate, R_03A200_SQ_LOOP_CONST_0, 0x01000FFF, 0xFFFFFFFF, NULL); @@ -1651,6 +1497,18 @@ void evergreen_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shader 0xFFFFFFFF, NULL); } +void evergreen_fetch_shader(struct r600_vertex_element *ve) +{ + struct r600_pipe_state *rstate = &ve->rstate; + rstate->id = R600_PIPE_STATE_FETCH_SHADER; + rstate->nregs = 0; + r600_pipe_state_add_reg(rstate, R_0288A8_SQ_PGM_RESOURCES_FS, + 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0288A4_SQ_PGM_START_FS, + (r600_bo_offset(ve->fetch_shader)) >> 8, + 0xFFFFFFFF, ve->fetch_shader); +} + void *evergreen_create_db_flush_dsa(struct r600_pipe_context *rctx) { struct pipe_depth_stencil_alpha_state dsa; @@ -1673,3 +1531,31 @@ void *evergreen_create_db_flush_dsa(struct r600_pipe_context *rctx) S_028000_COPY_CENTROID(1), NULL); return rstate; } + +void evergreen_pipe_set_buffer_resource(struct r600_pipe_context *rctx, + struct r600_pipe_state *rstate, + struct r600_resource *rbuffer, + unsigned offset, unsigned stride) +{ + r600_pipe_state_add_reg(rstate, R_030000_RESOURCE0_WORD0, + offset, 0xFFFFFFFF, rbuffer->bo); + r600_pipe_state_add_reg(rstate, R_030004_RESOURCE0_WORD1, + rbuffer->bo_size - offset - 1, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_030008_RESOURCE0_WORD2, + S_030008_STRIDE(stride), + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_03000C_RESOURCE0_WORD3, + S_03000C_DST_SEL_X(V_03000C_SQ_SEL_X) | + S_03000C_DST_SEL_Y(V_03000C_SQ_SEL_Y) | + S_03000C_DST_SEL_Z(V_03000C_SQ_SEL_Z) | + S_03000C_DST_SEL_W(V_03000C_SQ_SEL_W), + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_030010_RESOURCE0_WORD4, + 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_030014_RESOURCE0_WORD5, + 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_030018_RESOURCE0_WORD6, + 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_03001C_RESOURCE0_WORD7, + 0xC0000000, 0xFFFFFFFF, NULL); +} diff --git a/src/gallium/drivers/r600/evergreend.h b/src/gallium/drivers/r600/evergreend.h index e67254b256..c51a163bd0 100644 --- a/src/gallium/drivers/r600/evergreend.h +++ b/src/gallium/drivers/r600/evergreend.h @@ -108,8 +108,9 @@ #define PKT3_IT_OPCODE_S(x) (((x) & 0xFF) << 8) #define PKT3_IT_OPCODE_G(x) (((x) >> 8) & 0xFF) #define PKT3_IT_OPCODE_C 0xFFFF00FF +#define PKT3_PREDICATE(x) (((x) >> 0) & 0x1) #define PKT0(index, count) (PKT_TYPE_S(0) | PKT0_BASE_INDEX_S(index) | PKT_COUNT_S(count)) -#define PKT3(op, count) (PKT_TYPE_S(3) | PKT3_IT_OPCODE_S(op) | PKT_COUNT_S(count)) +#define PKT3(op, count, predicate) (PKT_TYPE_S(3) | PKT3_IT_OPCODE_S(op) | PKT_COUNT_S(count) | PKT3_PREDICATE(predicate)) /* Registers */ #define R_008C00_SQ_CONFIG 0x00008C00 @@ -327,6 +328,9 @@ #define S_028C70_SOURCE_FORMAT(x) (((x) & 0x3) << 24) #define G_028C70_SOURCE_FORMAT(x) (((x) >> 24) & 0x3) #define C_028C70_SOURCE_FORMAT 0xFCFFFFFF +#define V_028C70_EXPORT_4C_32BPC 0x0 +#define V_028C70_EXPORT_4C_16BPC 0x1 +#define V_028C70_EXPORT_2C_32BPC 0x2 /* Do not use */ #define S_028C70_RAT(x) (((x) & 0x1) << 26) #define G_028C70_RAT(x) (((x) >> 26) & 0x1) #define C_028C70_RAT 0xFBFFFFFF @@ -427,15 +431,6 @@ #define C_028800_STENCILZFAIL_BF 0x1FFFFFFF #define R_028808_CB_COLOR_CONTROL 0x028808 -#define S_028808_FOG_ENABLE(x) (((x) & 0x1) << 0) -#define G_028808_FOG_ENABLE(x) (((x) >> 0) & 0x1) -#define C_028808_FOG_ENABLE 0xFFFFFFFE -#define S_028808_MULTIWRITE_ENABLE(x) (((x) & 0x1) << 1) -#define G_028808_MULTIWRITE_ENABLE(x) (((x) >> 1) & 0x1) -#define C_028808_MULTIWRITE_ENABLE 0xFFFFFFFD -#define S_028808_DITHER_ENABLE(x) (((x) & 0x1) << 2) -#define G_028808_DITHER_ENABLE(x) (((x) >> 2) & 0x1) -#define C_028808_DITHER_ENABLE 0xFFFFFFFB #define S_028808_DEGAMMA_ENABLE(x) (((x) & 0x1) << 3) #define G_028808_DEGAMMA_ENABLE(x) (((x) >> 3) & 0x1) #define C_028808_DEGAMMA_ENABLE 0xFFFFFFF7 @@ -939,6 +934,9 @@ #define V_030000_SQ_TEX_DIM_2D_ARRAY 0x00000005 #define V_030000_SQ_TEX_DIM_2D_MSAA 0x00000006 #define V_030000_SQ_TEX_DIM_2D_ARRAY_MSAA 0x00000007 +#define S_030000_NON_DISP_TILING_ORDER(x) (((x) & 0x1) << 5) +#define G_030000_NON_DISP_TILING_ORDER(x) (((x) >> 5) & 0x1) +#define C_030000_NON_DISP_TILING_ORDER 0xFFFFFFDF #define S_030000_PITCH(x) (((x) & 0xFFF) << 6) #define G_030000_PITCH(x) (((x) >> 6) & 0xFFF) #define C_030000_PITCH 0xFFFC003F @@ -988,8 +986,8 @@ #define S_030010_SRF_MODE_ALL(x) (((x) & 0x1) << 10) #define G_030010_SRF_MODE_ALL(x) (((x) >> 10) & 0x1) #define C_030010_SRF_MODE_ALL 0xFFFFFBFF -#define V_030010_SFR_MODE_ZERO_CLAMP_MINUS_ONE 0x00000000 -#define V_030010_SFR_MODE_NO_ZERO 0x00000001 +#define V_030010_SRF_MODE_ZERO_CLAMP_MINUS_ONE 0x00000000 +#define V_030010_SRF_MODE_NO_ZERO 0x00000001 #define S_030010_FORCE_DEGAMMA(x) (((x) & 0x1) << 11) #define G_030010_FORCE_DEGAMMA(x) (((x) >> 11) & 0x1) #define C_030010_FORCE_DEGAMMA 0xFFFFF7FF diff --git a/src/gallium/drivers/r600/r600.h b/src/gallium/drivers/r600/r600.h index aa456d493f..0b7d6f7096 100644 --- a/src/gallium/drivers/r600/r600.h +++ b/src/gallium/drivers/r600/r600.h @@ -35,7 +35,7 @@ #define RADEON_CTX_MAX_PM4 (64 * 1024 / 4) #define R600_ERR(fmt, args...) \ - fprintf(stderr, "EE %s/%s:%d - "fmt, __FILE__, __func__, __LINE__, ##args) + fprintf(stderr, "EE %s:%d %s - "fmt, __FILE__, __LINE__, __func__, ##args) typedef uint64_t u64; typedef uint32_t u32; @@ -92,6 +92,9 @@ enum radeon_family { CHIP_CYPRESS, CHIP_HEMLOCK, CHIP_PALM, + CHIP_BARTS, + CHIP_TURKS, + CHIP_CAICOS, CHIP_LAST, }; @@ -110,14 +113,17 @@ struct r600_tiling_info { enum radeon_family r600_get_family(struct radeon *rw); enum chip_class r600_get_family_class(struct radeon *radeon); struct r600_tiling_info *r600_get_tiling_info(struct radeon *radeon); +unsigned r600_get_clock_crystal_freq(struct radeon *radeon); +unsigned r600_get_minor_version(struct radeon *radeon); +unsigned r600_get_num_backends(struct radeon *radeon); /* r600_bo.c */ struct r600_bo; struct r600_bo *r600_bo(struct radeon *radeon, - unsigned size, unsigned alignment, - unsigned binding, unsigned usage); + unsigned size, unsigned alignment, + unsigned binding, unsigned usage); struct r600_bo *r600_bo_handle(struct radeon *radeon, - unsigned handle, unsigned *array_mode); + unsigned handle, unsigned *array_mode); void *r600_bo_map(struct radeon *radeon, struct r600_bo *bo, unsigned usage, void *ctx); void r600_bo_unmap(struct radeon *radeon, struct r600_bo *bo); void r600_bo_reference(struct radeon *radeon, struct r600_bo **dst, @@ -245,10 +251,9 @@ struct r600_context { u32 *pm4; struct list_head query_list; unsigned num_query_running; - unsigned fence; struct list_head fenced_bo; - unsigned *cfence; - struct r600_bo *fence_bo; + unsigned max_db; /* for OQ */ + boolean predicate_drawing; }; struct r600_draw { @@ -281,13 +286,11 @@ void r600_query_begin(struct r600_context *ctx, struct r600_query *query); void r600_query_end(struct r600_context *ctx, struct r600_query *query); void r600_context_queries_suspend(struct r600_context *ctx); void r600_context_queries_resume(struct r600_context *ctx); +void r600_query_predication(struct r600_context *ctx, struct r600_query *query, int operation, + int flag_wait); int evergreen_context_init(struct r600_context *ctx, struct radeon *radeon); void evergreen_context_draw(struct r600_context *ctx, const struct r600_draw *draw); -void evergreen_ps_resource_set(struct r600_context *ctx, struct r600_pipe_state *state, unsigned rid); -void evergreen_vs_resource_set(struct r600_context *ctx, struct r600_pipe_state *state, unsigned rid); -void evergreen_fs_resource_set(struct r600_context *ctx, struct r600_pipe_state *state, unsigned rid); - void evergreen_context_pipe_state_set_ps_resource(struct r600_context *ctx, struct r600_pipe_state *state, unsigned rid); void evergreen_context_pipe_state_set_vs_resource(struct r600_context *ctx, struct r600_pipe_state *state, unsigned rid); void evergreen_context_pipe_state_set_fs_resource(struct r600_context *ctx, struct r600_pipe_state *state, unsigned rid); diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c index 1f41269534..240093f9b9 100644 --- a/src/gallium/drivers/r600/r600_asm.c +++ b/src/gallium/drivers/r600/r600_asm.c @@ -32,52 +32,118 @@ #include "r600_formats.h" #include "r600d.h" -static inline unsigned int r600_bc_get_num_operands(struct r600_bc_alu *alu) +#define NUM_OF_CYCLES 3 +#define NUM_OF_COMPONENTS 4 + +static inline unsigned int r600_bc_get_num_operands(struct r600_bc *bc, struct r600_bc_alu *alu) { if(alu->is_op3) return 3; - switch (alu->inst) { - case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP: - return 0; - case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD: - case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLE: - case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT: - case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGE: - case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLNE: - case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL: - case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX: - case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN: - case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE: - case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE: - case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT: - case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE: - case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE: - case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT: - case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE: - case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE: - case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4: - case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4_IEEE: - case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE: - return 2; - - case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV: - case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_FLOOR: - case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT: - case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR: - case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC: - case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE: - case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED: - case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE: - case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE: - case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE: - case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT: - case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN: - case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS: - return 1; - default: R600_ERR( - "Need instruction operand number for 0x%x.\n", alu->inst); - }; + switch (bc->chiprev) { + case CHIPREV_R600: + case CHIPREV_R700: + switch (alu->inst) { + case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP: + return 0; + case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD: + case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT: + case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLE: + case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT: + case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGE: + case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLNE: + case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL: + case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULHI_UINT: + case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX: + case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN: + case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE: + case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE: + case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT: + case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE: + case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE: + case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT: + case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE: + case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE: + case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4: + case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4_IEEE: + case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE: + return 2; + + case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV: + case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA: + case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_FLOOR: + case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT: + case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT: + case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR: + case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC: + case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE: + case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED: + case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE: + case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_CLAMPED: + case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE: + case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_CLAMPED: + case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE: + case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT: + case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT: + case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN: + case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS: + return 1; + default: R600_ERR( + "Need instruction operand number for 0x%x.\n", alu->inst); + } + break; + case CHIPREV_EVERGREEN: + switch (alu->inst) { + case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP: + return 0; + case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD: + case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT: + case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLE: + case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT: + case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGE: + case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLNE: + case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL: + case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULHI_UINT: + case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX: + case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN: + case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE: + case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE: + case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT: + case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE: + case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE: + case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT: + case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE: + case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE: + case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4: + case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4_IEEE: + case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE: + case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INTERP_XY: + case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INTERP_ZW: + return 2; + + case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV: + case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT: + case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT: + case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR: + case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC: + case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE: + case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED: + case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE: + case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_CLAMPED: + case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE: + case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_CLAMPED: + case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE: + case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT: + case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT_FLOOR: + case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT: + case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN: + case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS: + return 1; + default: R600_ERR( + "Need instruction operand number for 0x%x.\n", alu->inst); + } + break; + } return 3; } @@ -104,7 +170,6 @@ static struct r600_bc_alu *r600_bc_alu(void) if (alu == NULL) return NULL; LIST_INITHEAD(&alu->list); - LIST_INITHEAD(&alu->bs_list); return alu; } @@ -155,6 +220,9 @@ int r600_bc_init(struct r600_bc *bc, enum radeon_family family) case CHIP_CYPRESS: case CHIP_HEMLOCK: case CHIP_PALM: + case CHIP_BARTS: + case CHIP_TURKS: + case CHIP_CAICOS: bc->chiprev = CHIPREV_EVERGREEN; break; default: @@ -184,6 +252,37 @@ int r600_bc_add_output(struct r600_bc *bc, const struct r600_bc_output *output) { int r; + if (bc->cf_last && (bc->cf_last->inst == output->inst || + (bc->cf_last->inst == BC_INST(bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT) && + output->inst == BC_INST(bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE))) && + output->type == bc->cf_last->output.type && + output->elem_size == bc->cf_last->output.elem_size && + output->swizzle_x == bc->cf_last->output.swizzle_x && + output->swizzle_y == bc->cf_last->output.swizzle_y && + output->swizzle_z == bc->cf_last->output.swizzle_z && + output->swizzle_w == bc->cf_last->output.swizzle_w && + (output->burst_count + bc->cf_last->output.burst_count) <= 16) { + + if ((output->gpr + output->burst_count) == bc->cf_last->output.gpr && + (output->array_base + output->burst_count) == bc->cf_last->output.array_base) { + + bc->cf_last->output.end_of_program |= output->end_of_program; + bc->cf_last->output.inst = output->inst; + bc->cf_last->output.gpr = output->gpr; + bc->cf_last->output.array_base = output->array_base; + bc->cf_last->output.burst_count += output->burst_count; + return 0; + + } else if (output->gpr == (bc->cf_last->output.gpr + bc->cf_last->output.burst_count) && + output->array_base == (bc->cf_last->output.array_base + bc->cf_last->output.burst_count)) { + + bc->cf_last->output.end_of_program |= output->end_of_program; + bc->cf_last->output.inst = output->inst; + bc->cf_last->output.burst_count += output->burst_count; + return 0; + } + } + r = r600_bc_add_cf(bc); if (r) return r; @@ -192,221 +291,849 @@ int r600_bc_add_output(struct r600_bc *bc, const struct r600_bc_output *output) return 0; } -const unsigned bank_swizzle_vec[8] = {SQ_ALU_VEC_210, //000 - SQ_ALU_VEC_120, //001 - SQ_ALU_VEC_102, //010 +/* alu instructions that can ony exits once per group */ +static int is_alu_once_inst(struct r600_bc *bc, struct r600_bc_alu *alu) +{ + switch (bc->chiprev) { + case CHIPREV_R600: + case CHIPREV_R700: + return !alu->is_op3 && ( + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLE || + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT || + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGE || + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLNE || + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT_UINT || + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGE_UINT || + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLE_INT || + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT_INT || + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGE_INT || + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLNE_INT || + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT_UINT || + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE_UINT || + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE || + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT || + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE || + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE || + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SET_INV || + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SET_POP || + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SET_CLR || + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SET_RESTORE || + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE_PUSH || + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT_PUSH || + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE_PUSH || + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE_PUSH || + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE_INT || + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT_INT || + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE_INT || + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE_INT || + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE_PUSH_INT || + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT_PUSH_INT || + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE_PUSH_INT || + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE_PUSH_INT || + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETLT_PUSH_INT || + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETLE_PUSH_INT); + case CHIPREV_EVERGREEN: + default: + return !alu->is_op3 && ( + alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLE || + alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT || + alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGE || + alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLNE || + alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT_UINT || + alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGE_UINT || + alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLE_INT || + alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT_INT || + alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGE_INT || + alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLNE_INT || + alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT_UINT || + alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE_UINT || + alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE || + alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT || + alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE || + alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE || + alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SET_INV || + alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SET_POP || + alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SET_CLR || + alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SET_RESTORE || + alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE_PUSH || + alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT_PUSH || + alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE_PUSH || + alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE_PUSH || + alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE_INT || + alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT_INT || + alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE_INT || + alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE_INT || + alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE_PUSH_INT || + alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT_PUSH_INT || + alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE_PUSH_INT || + alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE_PUSH_INT || + alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETLT_PUSH_INT || + alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETLE_PUSH_INT); + } +} + +static int is_alu_reduction_inst(struct r600_bc *bc, struct r600_bc_alu *alu) +{ + switch (bc->chiprev) { + case CHIPREV_R600: + case CHIPREV_R700: + return !alu->is_op3 && ( + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE || + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4 || + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4_IEEE || + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX4); + case CHIPREV_EVERGREEN: + default: + return !alu->is_op3 && ( + alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE || + alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4 || + alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4_IEEE || + alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX4); + } +} - SQ_ALU_VEC_201, //011 - SQ_ALU_VEC_012, //100 - SQ_ALU_VEC_021, //101 +static int is_alu_cube_inst(struct r600_bc *bc, struct r600_bc_alu *alu) +{ + switch (bc->chiprev) { + case CHIPREV_R600: + case CHIPREV_R700: + return !alu->is_op3 && + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE; + case CHIPREV_EVERGREEN: + default: + return !alu->is_op3 && + alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE; + } +} - SQ_ALU_VEC_012, //110 - SQ_ALU_VEC_012}; //111 +static int is_alu_mova_inst(struct r600_bc *bc, struct r600_bc_alu *alu) +{ + switch (bc->chiprev) { + case CHIPREV_R600: + case CHIPREV_R700: + return !alu->is_op3 && ( + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA || + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_FLOOR || + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT); + case CHIPREV_EVERGREEN: + default: + return !alu->is_op3 && ( + alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT); + } +} -const unsigned bank_swizzle_scl[8] = {SQ_ALU_SCL_210, //000 - SQ_ALU_SCL_122, //001 - SQ_ALU_SCL_122, //010 +/* alu instructions that can only execute on the vector unit */ +static int is_alu_vec_unit_inst(struct r600_bc *bc, struct r600_bc_alu *alu) +{ + return is_alu_reduction_inst(bc, alu) || + is_alu_mova_inst(bc, alu) || + (bc->chiprev == CHIPREV_EVERGREEN && + alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT_FLOOR); +} - SQ_ALU_SCL_221, //011 - SQ_ALU_SCL_212, //100 - SQ_ALU_SCL_122, //101 +/* alu instructions that can only execute on the trans unit */ +static int is_alu_trans_unit_inst(struct r600_bc *bc, struct r600_bc_alu *alu) +{ + switch (bc->chiprev) { + case CHIPREV_R600: + case CHIPREV_R700: + if (!alu->is_op3) + return alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ASHR_INT || + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT || + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT || + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHL_INT || + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHR_INT || + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULHI_INT || + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULHI_UINT || + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_INT || + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_UINT || + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_INT || + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_UINT || + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_UINT_TO_FLT || + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS || + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE || + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED || + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE || + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_CLAMPED || + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_FF || + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE || + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_CLAMPED || + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_FF || + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE || + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN || + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SQRT_IEEE; + else + return alu->inst == V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT || + alu->inst == V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT_D2 || + alu->inst == V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT_M2 || + alu->inst == V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT_M4; + case CHIPREV_EVERGREEN: + default: + if (!alu->is_op3) + /* Note that FLT_TO_INT_* instructions are vector-only instructions + * on Evergreen, despite what the documentation says. FLT_TO_INT + * can do both vector and scalar. */ + return alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ASHR_INT || + alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT || + alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHL_INT || + alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHR_INT || + alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULHI_INT || + alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULHI_UINT || + alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_INT || + alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_UINT || + alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_INT || + alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_UINT || + alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_UINT_TO_FLT || + alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS || + alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE || + alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED || + alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE || + alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_CLAMPED || + alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_FF || + alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE || + alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_CLAMPED || + alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_FF || + alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE || + alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN || + alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SQRT_IEEE; + else + return alu->inst == EG_V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT; + } +} - SQ_ALU_SCL_122, //110 - SQ_ALU_SCL_122}; //111 +/* alu instructions that can execute on any unit */ +static int is_alu_any_unit_inst(struct r600_bc *bc, struct r600_bc_alu *alu) +{ + return !is_alu_vec_unit_inst(bc, alu) && + !is_alu_trans_unit_inst(bc, alu); +} -static int init_gpr(struct r600_bc_alu *alu) +static int assign_alu_units(struct r600_bc *bc, struct r600_bc_alu *alu_first, + struct r600_bc_alu *assignment[5]) { - int cycle, component; + struct r600_bc_alu *alu; + unsigned i, chan, trans; + + for (i = 0; i < 5; i++) + assignment[i] = NULL; + + for (alu = alu_first; alu; alu = LIST_ENTRY(struct r600_bc_alu, alu->list.next, list)) { + chan = alu->dst.chan; + if (is_alu_trans_unit_inst(bc, alu)) + trans = 1; + else if (is_alu_vec_unit_inst(bc, alu)) + trans = 0; + else if (assignment[chan]) + trans = 1; // assume ALU_INST_PREFER_VECTOR + else + trans = 0; + + if (trans) { + if (assignment[4]) { + assert(0); //ALU.Trans has already been allocated + return -1; + } + assignment[4] = alu; + } else { + if (assignment[chan]) { + assert(0); //ALU.chan has already been allocated + return -1; + } + assignment[chan] = alu; + } + + if (alu->last) + break; + } + return 0; +} + +struct alu_bank_swizzle { + int hw_gpr[NUM_OF_CYCLES][NUM_OF_COMPONENTS]; + int hw_cfile_addr[4]; + int hw_cfile_elem[4]; +}; + +static const unsigned cycle_for_bank_swizzle_vec[][3] = { + [SQ_ALU_VEC_012] = { 0, 1, 2 }, + [SQ_ALU_VEC_021] = { 0, 2, 1 }, + [SQ_ALU_VEC_120] = { 1, 2, 0 }, + [SQ_ALU_VEC_102] = { 1, 0, 2 }, + [SQ_ALU_VEC_201] = { 2, 0, 1 }, + [SQ_ALU_VEC_210] = { 2, 1, 0 } +}; + +static const unsigned cycle_for_bank_swizzle_scl[][3] = { + [SQ_ALU_SCL_210] = { 2, 1, 0 }, + [SQ_ALU_SCL_122] = { 1, 2, 2 }, + [SQ_ALU_SCL_212] = { 2, 1, 2 }, + [SQ_ALU_SCL_221] = { 2, 2, 1 } +}; + +static void init_bank_swizzle(struct alu_bank_swizzle *bs) +{ + int i, cycle, component; /* set up gpr use */ for (cycle = 0; cycle < NUM_OF_CYCLES; cycle++) for (component = 0; component < NUM_OF_COMPONENTS; component++) - alu->hw_gpr[cycle][component] = -1; - return 0; + bs->hw_gpr[cycle][component] = -1; + for (i = 0; i < 4; i++) + bs->hw_cfile_addr[i] = -1; + for (i = 0; i < 4; i++) + bs->hw_cfile_elem[i] = -1; } -#if 0 -static int reserve_gpr(struct r600_bc_alu *alu, unsigned sel, unsigned chan, unsigned cycle) +static int reserve_gpr(struct alu_bank_swizzle *bs, unsigned sel, unsigned chan, unsigned cycle) { - if (alu->hw_gpr[cycle][chan] < 0) - alu->hw_gpr[cycle][chan] = sel; - else if (alu->hw_gpr[cycle][chan] != (int)sel) { - R600_ERR("Another scalar operation has already used GPR read port for channel\n"); + if (bs->hw_gpr[cycle][chan] == -1) + bs->hw_gpr[cycle][chan] = sel; + else if (bs->hw_gpr[cycle][chan] != (int)sel) { + // Another scalar operation has already used GPR read port for channel return -1; } return 0; } -static int cycle_for_scalar_bank_swizzle(const int swiz, const int sel, unsigned *p_cycle) +static int reserve_cfile(struct r600_bc *bc, struct alu_bank_swizzle *bs, unsigned sel, unsigned chan) { - int table[3]; - int ret = 0; - switch (swiz) { - case SQ_ALU_SCL_210: - table[0] = 2; table[1] = 1; table[2] = 0; - *p_cycle = table[sel]; - break; - case SQ_ALU_SCL_122: - table[0] = 1; table[1] = 2; table[2] = 2; - *p_cycle = table[sel]; - break; - case SQ_ALU_SCL_212: - table[0] = 2; table[1] = 1; table[2] = 2; - *p_cycle = table[sel]; - break; - case SQ_ALU_SCL_221: - table[0] = 2; table[1] = 2; table[2] = 1; - *p_cycle = table[sel]; - break; - break; - default: - R600_ERR("bad scalar bank swizzle value\n"); - ret = -1; - break; + int res, num_res = 4; + if (bc->chiprev >= CHIPREV_R700) { + num_res = 2; + chan /= 2; + } + for (res = 0; res < num_res; ++res) { + if (bs->hw_cfile_addr[res] == -1) { + bs->hw_cfile_addr[res] = sel; + bs->hw_cfile_elem[res] = chan; + return 0; + } else if (bs->hw_cfile_addr[res] == sel && + bs->hw_cfile_elem[res] == chan) + return 0; // Read for this scalar element already reserved, nothing to do here. } - return ret; + // All cfile read ports are used, cannot reference vector element + return -1; } -static int cycle_for_vector_bank_swizzle(const int swiz, const int sel, unsigned *p_cycle) +static int is_gpr(unsigned sel) { - int table[3]; - int ret; - - switch (swiz) { - case SQ_ALU_VEC_012: - table[0] = 0; table[1] = 1; table[2] = 2; - *p_cycle = table[sel]; - break; - case SQ_ALU_VEC_021: - table[0] = 0; table[1] = 2; table[2] = 1; - *p_cycle = table[sel]; - break; - case SQ_ALU_VEC_120: - table[0] = 1; table[1] = 2; table[2] = 0; - *p_cycle = table[sel]; - break; - case SQ_ALU_VEC_102: - table[0] = 1; table[1] = 0; table[2] = 2; - *p_cycle = table[sel]; - break; - case SQ_ALU_VEC_201: - table[0] = 2; table[1] = 0; table[2] = 1; - *p_cycle = table[sel]; - break; - case SQ_ALU_VEC_210: - table[0] = 2; table[1] = 1; table[2] = 0; - *p_cycle = table[sel]; - break; - default: - R600_ERR("bad vector bank swizzle value\n"); - ret = -1; - break; - } - return ret; + return (sel >= 0 && sel <= 127); } +/* CB constants start at 512, and get translated to a kcache index when ALU + * clauses are constructed. Note that we handle kcache constants the same way + * as (the now gone) cfile constants, is that really required? */ +static int is_cfile(unsigned sel) +{ + return (sel > 255 && sel < 512) || + (sel > 511 && sel < 4607) || // Kcache before translate + (sel > 127 && sel < 192); // Kcache after translate +} +static int is_const(int sel) +{ + return is_cfile(sel) || + (sel >= V_SQ_ALU_SRC_0 && + sel <= V_SQ_ALU_SRC_LITERAL); +} -static void update_chan_counter(struct r600_bc_alu *alu, int *chan_counter) +static int check_vector(struct r600_bc *bc, struct r600_bc_alu *alu, + struct alu_bank_swizzle *bs, int bank_swizzle) { - int num_src; - int i; - int channel_swizzle; + int r, src, num_src, sel, elem, cycle; + + num_src = r600_bc_get_num_operands(bc, alu); + for (src = 0; src < num_src; src++) { + sel = alu->src[src].sel; + elem = alu->src[src].chan; + if (is_gpr(sel)) { + cycle = cycle_for_bank_swizzle_vec[bank_swizzle][src]; + if (src == 1 && sel == alu->src[0].sel && elem == alu->src[0].chan) + // Nothing to do; special-case optimization, + // second source uses first source’s reservation + continue; + else { + r = reserve_gpr(bs, sel, elem, cycle); + if (r) + return r; + } + } else if (is_cfile(sel)) { + r = reserve_cfile(bc, bs, sel, elem); + if (r) + return r; + } + // No restrictions on PV, PS, literal or special constants + } + return 0; +} - num_src = r600_bc_get_num_operands(alu); +static int check_scalar(struct r600_bc *bc, struct r600_bc_alu *alu, + struct alu_bank_swizzle *bs, int bank_swizzle) +{ + int r, src, num_src, const_count, sel, elem, cycle; + + num_src = r600_bc_get_num_operands(bc, alu); + for (const_count = 0, src = 0; src < num_src; ++src) { + sel = alu->src[src].sel; + elem = alu->src[src].chan; + if (is_const(sel)) { // Any constant, including literal and inline constants + if (const_count >= 2) + // More than two references to a constant in + // transcendental operation. + return -1; + else + const_count++; + } + if (is_cfile(sel)) { + r = reserve_cfile(bc, bs, sel, elem); + if (r) + return r; + } + } + for (src = 0; src < num_src; ++src) { + sel = alu->src[src].sel; + elem = alu->src[src].chan; + if (is_gpr(sel)) { + cycle = cycle_for_bank_swizzle_scl[bank_swizzle][src]; + if (cycle < const_count) + // Cycle for GPR load conflicts with + // constant load in transcendental operation. + return -1; + r = reserve_gpr(bs, sel, elem, cycle); + if (r) + return r; + } + // Constants already processed + // No restrictions on PV, PS + } + return 0; +} - for (i = 0; i < num_src; i++) { - channel_swizzle = alu->src[i].chan; - if ((alu->src[i].sel > 0 && alu->src[i].sel < 128) && channel_swizzle <= 3) - chan_counter[channel_swizzle]++; +static int check_and_set_bank_swizzle(struct r600_bc *bc, + struct r600_bc_alu *slots[5]) +{ + struct alu_bank_swizzle bs; + int bank_swizzle[5]; + int i, r = 0, forced = 0; + + for (i = 0; i < 5; i++) + if (slots[i] && slots[i]->bank_swizzle_force) { + slots[i]->bank_swizzle = slots[i]->bank_swizzle_force; + forced = 1; + } + + if (forced) + return 0; + + // just check every possible combination of bank swizzle + // not very efficent, but works on the first try in most of the cases + for (i = 0; i < 4; i++) + bank_swizzle[i] = SQ_ALU_VEC_012; + bank_swizzle[4] = SQ_ALU_SCL_210; + while(bank_swizzle[4] <= SQ_ALU_SCL_221) { + init_bank_swizzle(&bs); + for (i = 0; i < 4; i++) { + if (slots[i]) { + r = check_vector(bc, slots[i], &bs, bank_swizzle[i]); + if (r) + break; + } + } + if (!r && slots[4]) { + r = check_scalar(bc, slots[4], &bs, bank_swizzle[4]); + } + if (!r) { + for (i = 0; i < 5; i++) { + if (slots[i]) + slots[i]->bank_swizzle = bank_swizzle[i]; + } + return 0; + } + + for (i = 0; i < 5; i++) { + bank_swizzle[i]++; + if (bank_swizzle[i] <= SQ_ALU_VEC_210) + break; + else + bank_swizzle[i] = SQ_ALU_VEC_012; + } } + + // couldn't find a working swizzle + return -1; } -/* we need something like this I think - but this is bogus */ -int check_read_slots(struct r600_bc *bc, struct r600_bc_alu *alu_first) +static int replace_gpr_with_pv_ps(struct r600_bc *bc, + struct r600_bc_alu *slots[5], struct r600_bc_alu *alu_prev) { - struct r600_bc_alu *alu; - int chan_counter[4] = { 0 }; + struct r600_bc_alu *prev[5]; + int gpr[5], chan[5]; + int i, j, r, src, num_src; - update_chan_counter(alu_first, chan_counter); + r = assign_alu_units(bc, alu_prev, prev); + if (r) + return r; - LIST_FOR_EACH_ENTRY(alu, &alu_first->bs_list, bs_list) { - update_chan_counter(alu, chan_counter); + for (i = 0; i < 5; ++i) { + if(prev[i] && prev[i]->dst.write && !prev[i]->dst.rel) { + gpr[i] = prev[i]->dst.sel; + /* cube writes more than PV.X */ + if (!is_alu_cube_inst(bc, prev[i]) && is_alu_reduction_inst(bc, prev[i])) + chan[i] = 0; + else + chan[i] = prev[i]->dst.chan; + } else + gpr[i] = -1; } - if (chan_counter[0] > 3 || - chan_counter[1] > 3 || - chan_counter[2] > 3 || - chan_counter[3] > 3) { - R600_ERR("needed to split instruction for input ran out of banks %x %d %d %d %d\n", - alu_first->inst, chan_counter[0], chan_counter[1], chan_counter[2], chan_counter[3]); - return -1; + for (i = 0; i < 5; ++i) { + struct r600_bc_alu *alu = slots[i]; + if(!alu) + continue; + + num_src = r600_bc_get_num_operands(bc, alu); + for (src = 0; src < num_src; ++src) { + if (!is_gpr(alu->src[src].sel) || alu->src[src].rel) + continue; + + if (alu->src[src].sel == gpr[4] && + alu->src[src].chan == chan[4]) { + alu->src[src].sel = V_SQ_ALU_SRC_PS; + alu->src[src].chan = 0; + continue; + } + + for (j = 0; j < 4; ++j) { + if (alu->src[src].sel == gpr[j] && + alu->src[src].chan == j) { + alu->src[src].sel = V_SQ_ALU_SRC_PV; + alu->src[src].chan = chan[j]; + break; + } + } + } } + return 0; } -#endif -static int is_const(int sel) +void r600_bc_special_constants(u32 value, unsigned *sel, unsigned *neg) { - if (sel > 255 && sel < 512) - return 1; - if (sel >= V_SQ_ALU_SRC_0 && sel <= V_SQ_ALU_SRC_LITERAL) - return 1; - return 0; + switch(value) { + case 0: + *sel = V_SQ_ALU_SRC_0; + break; + case 1: + *sel = V_SQ_ALU_SRC_1_INT; + break; + case -1: + *sel = V_SQ_ALU_SRC_M_1_INT; + break; + case 0x3F800000: // 1.0f + *sel = V_SQ_ALU_SRC_1; + break; + case 0x3F000000: // 0.5f + *sel = V_SQ_ALU_SRC_0_5; + break; + case 0xBF800000: // -1.0f + *sel = V_SQ_ALU_SRC_1; + *neg ^= 1; + break; + case 0xBF000000: // -0.5f + *sel = V_SQ_ALU_SRC_0_5; + *neg ^= 1; + break; + default: + *sel = V_SQ_ALU_SRC_LITERAL; + break; + } } -static int check_scalar(struct r600_bc *bc, struct r600_bc_alu *alu) +/* compute how many literal are needed */ +static int r600_bc_alu_nliterals(struct r600_bc *bc, struct r600_bc_alu *alu, + uint32_t literal[4], unsigned *nliteral) { - unsigned swizzle_key; - - if (alu->bank_swizzle_force) { - alu->bank_swizzle = alu->bank_swizzle_force; - return 0; + unsigned num_src = r600_bc_get_num_operands(bc, alu); + unsigned i, j; + + for (i = 0; i < num_src; ++i) { + if (alu->src[i].sel == V_SQ_ALU_SRC_LITERAL) { + uint32_t value = alu->src[i].value; + unsigned found = 0; + for (j = 0; j < *nliteral; ++j) { + if (literal[j] == value) { + found = 1; + break; + } + } + if (!found) { + if (*nliteral >= 4) + return -EINVAL; + literal[(*nliteral)++] = value; + } + } } - swizzle_key = (is_const(alu->src[0].sel) ? 4 : 0 ) + - (is_const(alu->src[1].sel) ? 2 : 0 ) + - (is_const(alu->src[2].sel) ? 1 : 0 ); - - alu->bank_swizzle = bank_swizzle_scl[swizzle_key]; return 0; } -static int check_vector(struct r600_bc *bc, struct r600_bc_alu *alu) +static void r600_bc_alu_adjust_literals(struct r600_bc *bc, + struct r600_bc_alu *alu, + uint32_t literal[4], unsigned nliteral) +{ + unsigned num_src = r600_bc_get_num_operands(bc, alu); + unsigned i, j; + + for (i = 0; i < num_src; ++i) { + if (alu->src[i].sel == V_SQ_ALU_SRC_LITERAL) { + uint32_t value = alu->src[i].value; + for (j = 0; j < nliteral; ++j) { + if (literal[j] == value) { + alu->src[i].chan = j; + break; + } + } + } + } +} + +static int merge_inst_groups(struct r600_bc *bc, struct r600_bc_alu *slots[5], + struct r600_bc_alu *alu_prev) { - unsigned swizzle_key; + struct r600_bc_alu *prev[5]; + struct r600_bc_alu *result[5] = { NULL }; + + uint32_t literal[4], prev_literal[4]; + unsigned nliteral = 0, prev_nliteral = 0; - if (alu->bank_swizzle_force) { - alu->bank_swizzle = alu->bank_swizzle_force; + int i, j, r, src, num_src; + int num_once_inst = 0; + int have_mova = 0, have_rel = 0; + + r = assign_alu_units(bc, alu_prev, prev); + if (r) + return r; + + for (i = 0; i < 5; ++i) { + struct r600_bc_alu *alu; + + /* check number of literals */ + if (prev[i]) { + if (r600_bc_alu_nliterals(bc, prev[i], literal, &nliteral)) + return 0; + if (r600_bc_alu_nliterals(bc, prev[i], prev_literal, &prev_nliteral)) + return 0; + if (is_alu_mova_inst(bc, prev[i])) { + if (have_rel) + return 0; + have_mova = 1; + } + num_once_inst += is_alu_once_inst(bc, prev[i]); + } + if (slots[i] && r600_bc_alu_nliterals(bc, slots[i], literal, &nliteral)) + return 0; + + // let's check used slots + if (prev[i] && !slots[i]) { + result[i] = prev[i]; + continue; + } else if (prev[i] && slots[i]) { + if (result[4] == NULL && prev[4] == NULL && slots[4] == NULL) { + // trans unit is still free try to use it + if (is_alu_any_unit_inst(bc, slots[i])) { + result[i] = prev[i]; + result[4] = slots[i]; + } else if (is_alu_any_unit_inst(bc, prev[i])) { + result[i] = slots[i]; + result[4] = prev[i]; + } else + return 0; + } else + return 0; + } else if(!slots[i]) { + continue; + } else + result[i] = slots[i]; + + // let's check source gprs + alu = slots[i]; + num_once_inst += is_alu_once_inst(bc, alu); + + num_src = r600_bc_get_num_operands(bc, alu); + for (src = 0; src < num_src; ++src) { + if (alu->src[src].rel) { + if (have_mova) + return 0; + have_rel = 1; + } + + // constants doesn't matter + if (!is_gpr(alu->src[src].sel)) + continue; + + for (j = 0; j < 5; ++j) { + if (!prev[j] || !prev[j]->dst.write) + continue; + + // if it's relative then we can't determin which gpr is really used + if (prev[j]->dst.chan == alu->src[src].chan && + (prev[j]->dst.sel == alu->src[src].sel || + prev[j]->dst.rel || alu->src[src].rel)) + return 0; + } + } + } + + /* more than one PRED_ or KILL_ ? */ + if (num_once_inst > 1) return 0; + + /* check if the result can still be swizzlet */ + r = check_and_set_bank_swizzle(bc, result); + if (r) + return 0; + + /* looks like everything worked out right, apply the changes */ + + /* undo adding previus literals */ + bc->cf_last->ndw -= align(prev_nliteral, 2); + + /* sort instructions */ + for (i = 0; i < 5; ++i) { + slots[i] = result[i]; + if (result[i]) { + LIST_DEL(&result[i]->list); + result[i]->last = 0; + LIST_ADDTAIL(&result[i]->list, &bc->cf_last->alu); + } + } + + /* determine new last instruction */ + LIST_ENTRY(struct r600_bc_alu, bc->cf_last->alu.prev, list)->last = 1; + + /* determine new first instruction */ + for (i = 0; i < 5; ++i) { + if (result[i]) { + bc->cf_last->curr_bs_head = result[i]; + break; + } } - swizzle_key = (is_const(alu->src[0].sel) ? 4 : 0 ) + - (is_const(alu->src[1].sel) ? 2 : 0 ) + - (is_const(alu->src[2].sel) ? 1 : 0 ); - alu->bank_swizzle = bank_swizzle_vec[swizzle_key]; + bc->cf_last->prev_bs_head = bc->cf_last->prev2_bs_head; + bc->cf_last->prev2_bs_head = NULL; + return 0; } -static int check_and_set_bank_swizzle(struct r600_bc *bc, struct r600_bc_alu *alu_first) +/* This code handles kcache lines as single blocks of 32 constants. We could + * probably do slightly better by recognizing that we actually have two + * consecutive lines of 16 constants, but the resulting code would also be + * somewhat more complicated. */ +static int r600_bc_alloc_kcache_lines(struct r600_bc *bc, struct r600_bc_alu *alu, int type) { - struct r600_bc_alu *alu = NULL; - int num_instr = 1; + struct r600_bc_kcache *kcache = bc->cf_last->kcache; + unsigned int required_lines; + unsigned int free_lines = 0; + unsigned int cache_line[3]; + unsigned int count = 0; + unsigned int i, j; + int r; + + /* Collect required cache lines. */ + for (i = 0; i < 3; ++i) { + bool found = false; + unsigned int line; + + if (alu->src[i].sel < 512) + continue; - init_gpr(alu_first); + line = ((alu->src[i].sel - 512) / 32) * 2; - LIST_FOR_EACH_ENTRY(alu, &alu_first->bs_list, bs_list) { - num_instr++; + for (j = 0; j < count; ++j) { + if (cache_line[j] == line) { + found = true; + break; + } + } + + if (!found) + cache_line[count++] = line; } - if (num_instr == 1) { - check_scalar(bc, alu_first); - - } else { -/* check_read_slots(bc, bc->cf_last->curr_bs_head);*/ - check_vector(bc, alu_first); - LIST_FOR_EACH_ENTRY(alu, &alu_first->bs_list, bs_list) { - check_vector(bc, alu); + /* This should never actually happen. */ + if (count >= 3) return -ENOMEM; + + for (i = 0; i < 2; ++i) { + if (kcache[i].mode == V_SQ_CF_KCACHE_NOP) { + ++free_lines; + } + } + + /* Filter lines pulled in by previous intructions. Note that this is + * only for the required_lines count, we can't remove these from the + * cache_line array since we may have to start a new ALU clause. */ + for (i = 0, required_lines = count; i < count; ++i) { + for (j = 0; j < 2; ++j) { + if (kcache[j].mode == V_SQ_CF_KCACHE_LOCK_2 && + kcache[j].addr == cache_line[i]) { + --required_lines; + break; + } + } + } + + /* Start a new ALU clause if needed. */ + if (required_lines > free_lines) { + if ((r = r600_bc_add_cf(bc))) { + return r; + } + bc->cf_last->inst = (type << 3); + kcache = bc->cf_last->kcache; + } + + /* Setup the kcache lines. */ + for (i = 0; i < count; ++i) { + bool found = false; + + for (j = 0; j < 2; ++j) { + if (kcache[j].mode == V_SQ_CF_KCACHE_LOCK_2 && + kcache[j].addr == cache_line[i]) { + found = true; + break; + } + } + + if (found) continue; + + for (j = 0; j < 2; ++j) { + if (kcache[j].mode == V_SQ_CF_KCACHE_NOP) { + kcache[j].bank = 0; + kcache[j].addr = cache_line[i]; + kcache[j].mode = V_SQ_CF_KCACHE_LOCK_2; + break; + } + } + } + + /* Alter the src operands to refer to the kcache. */ + for (i = 0; i < 3; ++i) { + static const unsigned int base[] = {128, 160, 256, 288}; + unsigned int line; + + if (alu->src[i].sel < 512) + continue; + + alu->src[i].sel -= 512; + line = (alu->src[i].sel / 32) * 2; + + for (j = 0; j < 2; ++j) { + if (kcache[j].mode == V_SQ_CF_KCACHE_LOCK_2 && + kcache[j].addr == line) { + alu->src[i].sel &= 0x1f; + alu->src[i].sel += base[j]; + break; + } } } + return 0; } @@ -419,62 +1146,100 @@ int r600_bc_add_alu_type(struct r600_bc *bc, const struct r600_bc_alu *alu, int if (nalu == NULL) return -ENOMEM; memcpy(nalu, alu, sizeof(struct r600_bc_alu)); - nalu->nliteral = 0; + + if (bc->cf_last != NULL && bc->cf_last->inst != (type << 3)) { + /* check if we could add it anyway */ + if (bc->cf_last->inst == (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3) && + type == V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE) { + LIST_FOR_EACH_ENTRY(lalu, &bc->cf_last->alu, list) { + if (lalu->predicate) { + bc->force_add_cf = 1; + break; + } + } + } else + bc->force_add_cf = 1; + } /* cf can contains only alu or only vtx or only tex */ - if (bc->cf_last == NULL || bc->cf_last->inst != (type << 3) || - bc->force_add_cf) { + if (bc->cf_last == NULL || bc->force_add_cf) { r = r600_bc_add_cf(bc); if (r) { free(nalu); return r; } - bc->cf_last->inst = (type << 3); } + bc->cf_last->inst = (type << 3); + + /* Setup the kcache for this ALU instruction. This will start a new + * ALU clause if needed. */ + if ((r = r600_bc_alloc_kcache_lines(bc, nalu, type))) { + free(nalu); + return r; + } + if (!bc->cf_last->curr_bs_head) { bc->cf_last->curr_bs_head = nalu; - LIST_INITHEAD(&nalu->bs_list); - } else { - LIST_ADDTAIL(&nalu->bs_list, &bc->cf_last->curr_bs_head->bs_list); - } - /* at most 128 slots, one add alu can add 4 slots + 4 constants(2 slots) - * worst case */ - if (alu->last && (bc->cf_last->ndw >> 1) >= 120) { - bc->force_add_cf = 1; } /* number of gpr == the last gpr used in any alu */ for (i = 0; i < 3; i++) { - if (alu->src[i].sel >= bc->ngpr && alu->src[i].sel < 128) { - bc->ngpr = alu->src[i].sel + 1; - } - /* compute how many literal are needed - * either 2 or 4 literals - */ - if (alu->src[i].sel == 253) { - if (((alu->src[i].chan + 2) & 0x6) > nalu->nliteral) { - nalu->nliteral = (alu->src[i].chan + 2) & 0x6; - } + if (nalu->src[i].sel >= bc->ngpr && nalu->src[i].sel < 128) { + bc->ngpr = nalu->src[i].sel + 1; } + if (nalu->src[i].sel == V_SQ_ALU_SRC_LITERAL) + r600_bc_special_constants(nalu->src[i].value, + &nalu->src[i].sel, &nalu->src[i].neg); } - if (!LIST_IS_EMPTY(&bc->cf_last->alu)) { - lalu = LIST_ENTRY(struct r600_bc_alu, bc->cf_last->alu.prev, list); - if (!lalu->last && lalu->nliteral > nalu->nliteral) { - nalu->nliteral = lalu->nliteral; - } - } - if (alu->dst.sel >= bc->ngpr) { - bc->ngpr = alu->dst.sel + 1; + if (nalu->dst.sel >= bc->ngpr) { + bc->ngpr = nalu->dst.sel + 1; } LIST_ADDTAIL(&nalu->list, &bc->cf_last->alu); /* each alu use 2 dwords */ bc->cf_last->ndw += 2; bc->ndw += 2; - bc->cf_last->kcache0_mode = 2; - /* process cur ALU instructions for bank swizzle */ - if (alu->last) { - check_and_set_bank_swizzle(bc, bc->cf_last->curr_bs_head); + if (nalu->last) { + uint32_t literal[4]; + unsigned nliteral; + struct r600_bc_alu *slots[5]; + r = assign_alu_units(bc, bc->cf_last->curr_bs_head, slots); + if (r) + return r; + + if (bc->cf_last->prev_bs_head) { + r = merge_inst_groups(bc, slots, bc->cf_last->prev_bs_head); + if (r) + return r; + } + + if (bc->cf_last->prev_bs_head) { + r = replace_gpr_with_pv_ps(bc, slots, bc->cf_last->prev_bs_head); + if (r) + return r; + } + + r = check_and_set_bank_swizzle(bc, slots); + if (r) + return r; + + for (i = 0, nliteral = 0; i < 5; i++) { + if (slots[i]) { + r = r600_bc_alu_nliterals(bc, slots[i], literal, &nliteral); + if (r) + return r; + } + } + bc->cf_last->ndw += align(nliteral, 2); + + /* at most 128 slots, one add alu can add 5 slots + 4 constants(2 slots) + * worst case */ + if ((bc->cf_last->ndw >> 1) >= 120) { + bc->force_add_cf = 1; + } + + bc->cf_last->prev2_bs_head = bc->cf_last->prev_bs_head; + bc->cf_last->prev_bs_head = bc->cf_last->curr_bs_head; bc->cf_last->curr_bs_head = NULL; } return 0; @@ -485,42 +1250,22 @@ int r600_bc_add_alu(struct r600_bc *bc, const struct r600_bc_alu *alu) return r600_bc_add_alu_type(bc, alu, BC_INST(bc, V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU)); } -int r600_bc_add_literal(struct r600_bc *bc, const u32 *value) +static unsigned r600_bc_num_tex_and_vtx_instructions(const struct r600_bc *bc) { - struct r600_bc_alu *alu; + switch (bc->chiprev) { + case CHIPREV_R600: + return 8; - if (bc->cf_last == NULL) { - return 0; - } - if (bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_TEX) { - return 0; - } - /* all same on EG */ - if (bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_JUMP || - bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_ELSE || - bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL || - bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK || - bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE || - bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END || - bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_POP) { - return 0; - } - /* same on EG */ - if (((bc->cf_last->inst != (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3)) && - (bc->cf_last->inst != (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE << 3))) || - LIST_IS_EMPTY(&bc->cf_last->alu)) { - R600_ERR("last CF is not ALU (%p)\n", bc->cf_last); - return -EINVAL; - } - alu = LIST_ENTRY(struct r600_bc_alu, bc->cf_last->alu.prev, list); - if (!alu->last || !alu->nliteral || alu->literal_added) { - return 0; + case CHIPREV_R700: + return 16; + + case CHIPREV_EVERGREEN: + return 64; + + default: + R600_ERR("Unknown chiprev %d.\n", bc->chiprev); + return 8; } - memcpy(alu->value, value, 4 * 4); - bc->cf_last->ndw += alu->nliteral; - bc->ndw += alu->nliteral; - alu->literal_added = 1; - return 0; } int r600_bc_add_vtx(struct r600_bc *bc, const struct r600_bc_vtx *vtx) @@ -548,7 +1293,7 @@ int r600_bc_add_vtx(struct r600_bc *bc, const struct r600_bc_vtx *vtx) /* each fetch use 4 dwords */ bc->cf_last->ndw += 4; bc->ndw += 4; - if ((bc->ndw / 4) > 7) + if ((bc->cf_last->ndw / 4) >= r600_bc_num_tex_and_vtx_instructions(bc)) bc->force_add_cf = 1; return 0; } @@ -562,6 +1307,18 @@ int r600_bc_add_tex(struct r600_bc *bc, const struct r600_bc_tex *tex) return -ENOMEM; memcpy(ntex, tex, sizeof(struct r600_bc_tex)); + /* we can't fetch data und use it as texture lookup address in the same TEX clause */ + if (bc->cf_last != NULL && + bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_TEX) { + struct r600_bc_tex *ttex; + LIST_FOR_EACH_ENTRY(ttex, &bc->cf_last->tex, list) { + if (ttex->dst_gpr == ntex->src_gpr) { + bc->force_add_cf = 1; + break; + } + } + } + /* cf can contains only alu or only vtx or only tex */ if (bc->cf_last == NULL || bc->cf_last->inst != V_SQ_CF_WORD1_SQ_CF_INST_TEX || @@ -573,11 +1330,17 @@ int r600_bc_add_tex(struct r600_bc *bc, const struct r600_bc_tex *tex) } bc->cf_last->inst = V_SQ_CF_WORD1_SQ_CF_INST_TEX; } + if (ntex->src_gpr >= bc->ngpr) { + bc->ngpr = ntex->src_gpr + 1; + } + if (ntex->dst_gpr >= bc->ngpr) { + bc->ngpr = ntex->dst_gpr + 1; + } LIST_ADDTAIL(&ntex->list, &bc->cf_last->tex); /* each texture fetch use 4 dwords */ bc->cf_last->ndw += 4; bc->ndw += 4; - if ((bc->ndw / 4) > 7) + if ((bc->cf_last->ndw / 4) >= r600_bc_num_tex_and_vtx_instructions(bc)) bc->force_add_cf = 1; return 0; } @@ -597,31 +1360,8 @@ int r600_bc_add_cfinst(struct r600_bc *bc, int inst) /* common to all 3 families */ static int r600_bc_vtx_build(struct r600_bc *bc, struct r600_bc_vtx *vtx, unsigned id) { - unsigned fetch_resource_start = 0; - - /* check if we are fetch shader */ - /* fetch shader can also access vertex resource, - * first fetch shader resource is at 160 - */ - if (bc->type == -1) { - switch (bc->chiprev) { - /* r600 */ - case CHIPREV_R600: - /* r700 */ - case CHIPREV_R700: - fetch_resource_start = 160; - break; - /* evergreen */ - case CHIPREV_EVERGREEN: - fetch_resource_start = 0; - break; - default: - fprintf(stderr, "%s:%s:%d unknown chiprev %d\n", - __FILE__, __func__, __LINE__, bc->chiprev); - break; - } - } - bc->bytecode[id++] = S_SQ_VTX_WORD0_BUFFER_ID(vtx->buffer_id + fetch_resource_start) | + bc->bytecode[id++] = S_SQ_VTX_WORD0_BUFFER_ID(vtx->buffer_id) | + S_SQ_VTX_WORD0_FETCH_TYPE(vtx->fetch_type) | S_SQ_VTX_WORD0_SRC_GPR(vtx->src_gpr) | S_SQ_VTX_WORD0_SRC_SEL_X(vtx->src_sel_x) | S_SQ_VTX_WORD0_MEGA_FETCH_COUNT(vtx->mega_fetch_count); @@ -635,7 +1375,8 @@ static int r600_bc_vtx_build(struct r600_bc *bc, struct r600_bc_vtx *vtx, unsign S_SQ_VTX_WORD1_FORMAT_COMP_ALL(vtx->format_comp_all) | S_SQ_VTX_WORD1_SRF_MODE_ALL(vtx->srf_mode_all) | S_SQ_VTX_WORD1_GPR_DST_GPR(vtx->dst_gpr); - bc->bytecode[id++] = S_SQ_VTX_WORD2_MEGA_FETCH(1); + bc->bytecode[id++] = S_SQ_VTX_WORD2_OFFSET(vtx->offset) | + S_SQ_VTX_WORD2_MEGA_FETCH(1); bc->bytecode[id++] = 0; return 0; } @@ -673,8 +1414,6 @@ static int r600_bc_tex_build(struct r600_bc *bc, struct r600_bc_tex *tex, unsign /* r600 only, r700/eg bits in r700_asm.c */ static int r600_bc_alu_build(struct r600_bc *bc, struct r600_bc_alu *alu, unsigned id) { - unsigned i; - /* don't replace gpr by pv or ps for destination register */ bc->bytecode[id++] = S_SQ_ALU_WORD0_SRC0_SEL(alu->src[0].sel) | S_SQ_ALU_WORD0_SRC0_REL(alu->src[0].rel) | @@ -705,22 +1444,23 @@ static int r600_bc_alu_build(struct r600_bc *bc, struct r600_bc_alu *alu, unsign S_SQ_ALU_WORD1_OP2_SRC0_ABS(alu->src[0].abs) | S_SQ_ALU_WORD1_OP2_SRC1_ABS(alu->src[1].abs) | S_SQ_ALU_WORD1_OP2_WRITE_MASK(alu->dst.write) | + S_SQ_ALU_WORD1_OP2_OMOD(alu->omod) | S_SQ_ALU_WORD1_OP2_ALU_INST(alu->inst) | S_SQ_ALU_WORD1_BANK_SWIZZLE(alu->bank_swizzle) | S_SQ_ALU_WORD1_OP2_UPDATE_EXECUTE_MASK(alu->predicate) | S_SQ_ALU_WORD1_OP2_UPDATE_PRED(alu->predicate); } - if (alu->last) { - if (alu->nliteral && !alu->literal_added) { - R600_ERR("Bug in ALU processing for instruction 0x%08x, literal not added correctly\n", alu->inst); - } - for (i = 0; i < alu->nliteral; i++) { - bc->bytecode[id++] = alu->value[i]; - } - } return 0; } +static void r600_bc_cf_vtx_build(uint32_t *bytecode, const struct r600_bc_cf *cf) +{ + *bytecode++ = S_SQ_CF_WORD0_ADDR(cf->addr >> 1); + *bytecode++ = S_SQ_CF_WORD1_CF_INST(cf->inst) | + S_SQ_CF_WORD1_BARRIER(1) | + S_SQ_CF_WORD1_COUNT((cf->ndw / 4) - 1); +} + /* common for r600/r700 - eg in eg_asm.c */ static int r600_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf) { @@ -729,15 +1469,17 @@ static int r600_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf) switch (cf->inst) { case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3): case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE << 3): + case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER << 3): + case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER << 3): bc->bytecode[id++] = S_SQ_CF_ALU_WORD0_ADDR(cf->addr >> 1) | - S_SQ_CF_ALU_WORD0_KCACHE_MODE0(cf->kcache0_mode) | - S_SQ_CF_ALU_WORD0_KCACHE_BANK0(cf->kcache0_bank) | - S_SQ_CF_ALU_WORD0_KCACHE_BANK1(cf->kcache1_bank); + S_SQ_CF_ALU_WORD0_KCACHE_MODE0(cf->kcache[0].mode) | + S_SQ_CF_ALU_WORD0_KCACHE_BANK0(cf->kcache[0].bank) | + S_SQ_CF_ALU_WORD0_KCACHE_BANK1(cf->kcache[1].bank); bc->bytecode[id++] = S_SQ_CF_ALU_WORD1_CF_INST(cf->inst >> 3) | - S_SQ_CF_ALU_WORD1_KCACHE_MODE1(cf->kcache1_mode) | - S_SQ_CF_ALU_WORD1_KCACHE_ADDR0(cf->kcache0_addr) | - S_SQ_CF_ALU_WORD1_KCACHE_ADDR1(cf->kcache1_addr) | + S_SQ_CF_ALU_WORD1_KCACHE_MODE1(cf->kcache[1].mode) | + S_SQ_CF_ALU_WORD1_KCACHE_ADDR0(cf->kcache[0].addr) | + S_SQ_CF_ALU_WORD1_KCACHE_ADDR1(cf->kcache[1].addr) | S_SQ_CF_ALU_WORD1_BARRIER(1) | S_SQ_CF_ALU_WORD1_USES_WATERFALL(bc->chiprev == CHIPREV_R600 ? cf->r6xx_uses_waterfall : 0) | S_SQ_CF_ALU_WORD1_COUNT((cf->ndw / 2) - 1); @@ -745,10 +1487,10 @@ static int r600_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf) case V_SQ_CF_WORD1_SQ_CF_INST_TEX: case V_SQ_CF_WORD1_SQ_CF_INST_VTX: case V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC: - bc->bytecode[id++] = S_SQ_CF_WORD0_ADDR(cf->addr >> 1); - bc->bytecode[id++] = S_SQ_CF_WORD1_CF_INST(cf->inst) | - S_SQ_CF_WORD1_BARRIER(1) | - S_SQ_CF_WORD1_COUNT((cf->ndw / 4) - 1); + if (bc->chiprev == CHIPREV_R700) + r700_bc_cf_vtx_build(&bc->bytecode[id], cf); + else + r600_bc_cf_vtx_build(&bc->bytecode[id], cf); break; case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT: case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE: @@ -756,7 +1498,8 @@ static int r600_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf) S_SQ_CF_ALLOC_EXPORT_WORD0_ELEM_SIZE(cf->output.elem_size) | S_SQ_CF_ALLOC_EXPORT_WORD0_ARRAY_BASE(cf->output.array_base) | S_SQ_CF_ALLOC_EXPORT_WORD0_TYPE(cf->output.type); - bc->bytecode[id++] = S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_X(cf->output.swizzle_x) | + bc->bytecode[id++] = S_SQ_CF_ALLOC_EXPORT_WORD1_BURST_COUNT(cf->output.burst_count - 1) | + S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_X(cf->output.swizzle_x) | S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Y(cf->output.swizzle_y) | S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Z(cf->output.swizzle_z) | S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_W(cf->output.swizzle_w) | @@ -793,8 +1536,10 @@ int r600_bc_build(struct r600_bc *bc) struct r600_bc_alu *alu; struct r600_bc_vtx *vtx; struct r600_bc_tex *tex; + uint32_t literal[4]; + unsigned nliteral; unsigned addr; - int r; + int i, r; if (bc->callstack[0].max > 0) bc->nstack = ((bc->callstack[0].max + 3) >> 2) + 2; @@ -808,6 +1553,8 @@ int r600_bc_build(struct r600_bc *bc) LIST_FOR_EACH_ENTRY(cf, &bc->cf, list) { switch (cf->inst) { case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3): + case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER << 3): + case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER << 3): case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE << 3): break; case V_SQ_CF_WORD1_SQ_CF_INST_TEX: @@ -854,8 +1601,16 @@ int r600_bc_build(struct r600_bc *bc) return r; switch (cf->inst) { case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3): + case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER << 3): + case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER << 3): case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE << 3): + nliteral = 0; + memset(literal, 0, sizeof(literal)); LIST_FOR_EACH_ENTRY(alu, &cf->alu, list) { + r = r600_bc_alu_nliterals(bc, alu, literal, &nliteral); + if (r) + return r; + r600_bc_alu_adjust_literals(bc, alu, literal, nliteral); switch(bc->chiprev) { case CHIPREV_R600: r = r600_bc_alu_build(bc, alu, addr); @@ -872,7 +1627,11 @@ int r600_bc_build(struct r600_bc *bc) return r; addr += 2; if (alu->last) { - addr += alu->nliteral; + for (i = 0; i < align(nliteral, 2); ++i) { + bc->bytecode[addr++] = literal[i]; + } + nliteral = 0; + memset(literal, 0, sizeof(literal)); } } break; @@ -953,7 +1712,14 @@ void r600_bc_clear(struct r600_bc *bc) void r600_bc_dump(struct r600_bc *bc) { - unsigned i; + struct r600_bc_cf *cf = NULL; + struct r600_bc_alu *alu = NULL; + struct r600_bc_vtx *vtx = NULL; + struct r600_bc_tex *tex = NULL; + + unsigned i, id; + uint32_t literal[4]; + unsigned nliteral; char chip = '6'; switch (bc->chiprev) { @@ -968,84 +1734,191 @@ void r600_bc_dump(struct r600_bc *bc) chip = '6'; break; } - fprintf(stderr, "bytecode %d dw -----------------------\n", bc->ndw); + fprintf(stderr, "bytecode %d dw -- %d gprs ---------------------\n", bc->ndw, bc->ngpr); fprintf(stderr, " %c\n", chip); - for (i = 0; i < bc->ndw; i++) { - fprintf(stderr, "0x%08X\n", bc->bytecode[i]); - } - fprintf(stderr, "--------------------------------------\n"); -} -void r600_cf_vtx(struct r600_vertex_element *ve, u32 *bytecode, unsigned count) -{ - struct r600_pipe_state *rstate; - unsigned i = 0; - - if (count > 8) { - bytecode[i++] = S_SQ_CF_WORD0_ADDR(8 >> 1); - bytecode[i++] = S_SQ_CF_WORD1_CF_INST(V_SQ_CF_WORD1_SQ_CF_INST_VTX) | - S_SQ_CF_WORD1_BARRIER(1) | - S_SQ_CF_WORD1_COUNT(8 - 1); - bytecode[i++] = S_SQ_CF_WORD0_ADDR(40 >> 1); - bytecode[i++] = S_SQ_CF_WORD1_CF_INST(V_SQ_CF_WORD1_SQ_CF_INST_VTX) | - S_SQ_CF_WORD1_BARRIER(1) | - S_SQ_CF_WORD1_COUNT(count - 8 - 1); - } else { - bytecode[i++] = S_SQ_CF_WORD0_ADDR(8 >> 1); - bytecode[i++] = S_SQ_CF_WORD1_CF_INST(V_SQ_CF_WORD1_SQ_CF_INST_VTX) | - S_SQ_CF_WORD1_BARRIER(1) | - S_SQ_CF_WORD1_COUNT(count - 1); - } - bytecode[i++] = S_SQ_CF_WORD0_ADDR(0); - bytecode[i++] = S_SQ_CF_WORD1_CF_INST(V_SQ_CF_WORD1_SQ_CF_INST_RETURN) | - S_SQ_CF_WORD1_BARRIER(1); - - rstate = &ve->rstate; - rstate->id = R600_PIPE_STATE_FETCH_SHADER; - rstate->nregs = 0; - r600_pipe_state_add_reg(rstate, R_0288A4_SQ_PGM_RESOURCES_FS, - 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0288DC_SQ_PGM_CF_OFFSET_FS, - 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028894_SQ_PGM_START_FS, - r600_bo_offset(ve->fetch_shader) >> 8, - 0xFFFFFFFF, ve->fetch_shader); -} + LIST_FOR_EACH_ENTRY(cf, &bc->cf, list) { + id = cf->id; -void r600_cf_vtx_tc(struct r600_vertex_element *ve, u32 *bytecode, unsigned count) -{ - struct r600_pipe_state *rstate; - unsigned i = 0; - - if (count > 8) { - bytecode[i++] = S_SQ_CF_WORD0_ADDR(8 >> 1); - bytecode[i++] = S_SQ_CF_WORD1_CF_INST(V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC) | - S_SQ_CF_WORD1_BARRIER(1) | - S_SQ_CF_WORD1_COUNT(8 - 1); - bytecode[i++] = S_SQ_CF_WORD0_ADDR(40 >> 1); - bytecode[i++] = S_SQ_CF_WORD1_CF_INST(V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC) | - S_SQ_CF_WORD1_BARRIER(1) | - S_SQ_CF_WORD1_COUNT((count - 8) - 1); - } else { - bytecode[i++] = S_SQ_CF_WORD0_ADDR(8 >> 1); - bytecode[i++] = S_SQ_CF_WORD1_CF_INST(V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC) | - S_SQ_CF_WORD1_BARRIER(1) | - S_SQ_CF_WORD1_COUNT(count - 1); + switch (cf->inst) { + case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3): + case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER << 3): + case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER << 3): + case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE << 3): + fprintf(stderr, "%04d %08X ALU ", id, bc->bytecode[id]); + fprintf(stderr, "ADDR:%d ", cf->addr); + fprintf(stderr, "KCACHE_MODE0:%X ", cf->kcache[0].mode); + fprintf(stderr, "KCACHE_BANK0:%X ", cf->kcache[0].bank); + fprintf(stderr, "KCACHE_BANK1:%X\n", cf->kcache[1].bank); + id++; + fprintf(stderr, "%04d %08X ALU ", id, bc->bytecode[id]); + fprintf(stderr, "INST:%d ", cf->inst); + fprintf(stderr, "KCACHE_MODE1:%X ", cf->kcache[1].mode); + fprintf(stderr, "KCACHE_ADDR0:%X ", cf->kcache[0].addr); + fprintf(stderr, "KCACHE_ADDR1:%X ", cf->kcache[1].addr); + fprintf(stderr, "COUNT:%d\n", cf->ndw / 2); + break; + case V_SQ_CF_WORD1_SQ_CF_INST_TEX: + case V_SQ_CF_WORD1_SQ_CF_INST_VTX: + case V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC: + fprintf(stderr, "%04d %08X TEX/VTX ", id, bc->bytecode[id]); + fprintf(stderr, "ADDR:%d\n", cf->addr); + id++; + fprintf(stderr, "%04d %08X TEX/VTX ", id, bc->bytecode[id]); + fprintf(stderr, "INST:%d ", cf->inst); + fprintf(stderr, "COUNT:%d\n", cf->ndw / 4); + break; + case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT: + case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE: + fprintf(stderr, "%04d %08X EXPORT ", id, bc->bytecode[id]); + fprintf(stderr, "GPR:%X ", cf->output.gpr); + fprintf(stderr, "ELEM_SIZE:%X ", cf->output.elem_size); + fprintf(stderr, "ARRAY_BASE:%X ", cf->output.array_base); + fprintf(stderr, "TYPE:%X\n", cf->output.type); + id++; + fprintf(stderr, "%04d %08X EXPORT ", id, bc->bytecode[id]); + fprintf(stderr, "SWIZ_X:%X ", cf->output.swizzle_x); + fprintf(stderr, "SWIZ_Y:%X ", cf->output.swizzle_y); + fprintf(stderr, "SWIZ_Z:%X ", cf->output.swizzle_z); + fprintf(stderr, "SWIZ_W:%X ", cf->output.swizzle_w); + fprintf(stderr, "BARRIER:%X ", cf->output.barrier); + fprintf(stderr, "INST:%d ", cf->output.inst); + fprintf(stderr, "BURST_COUNT:%d ", cf->output.burst_count); + fprintf(stderr, "EOP:%X\n", cf->output.end_of_program); + break; + case V_SQ_CF_WORD1_SQ_CF_INST_JUMP: + case V_SQ_CF_WORD1_SQ_CF_INST_ELSE: + case V_SQ_CF_WORD1_SQ_CF_INST_POP: + case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL: + case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END: + case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE: + case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK: + case V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS: + case V_SQ_CF_WORD1_SQ_CF_INST_RETURN: + fprintf(stderr, "%04d %08X CF ", id, bc->bytecode[id]); + fprintf(stderr, "ADDR:%d\n", cf->cf_addr); + id++; + fprintf(stderr, "%04d %08X CF ", id, bc->bytecode[id]); + fprintf(stderr, "INST:%d ", cf->inst); + fprintf(stderr, "COND:%X ", cf->cond); + fprintf(stderr, "POP_COUNT:%X\n", cf->pop_count); + break; + } + + id = cf->addr; + nliteral = 0; + LIST_FOR_EACH_ENTRY(alu, &cf->alu, list) { + r600_bc_alu_nliterals(bc, alu, literal, &nliteral); + + fprintf(stderr, "%04d %08X ", id, bc->bytecode[id]); + fprintf(stderr, "SRC0(SEL:%d ", alu->src[0].sel); + fprintf(stderr, "REL:%d ", alu->src[0].rel); + fprintf(stderr, "CHAN:%d ", alu->src[0].chan); + fprintf(stderr, "NEG:%d) ", alu->src[0].neg); + fprintf(stderr, "SRC1(SEL:%d ", alu->src[1].sel); + fprintf(stderr, "REL:%d ", alu->src[1].rel); + fprintf(stderr, "CHAN:%d ", alu->src[1].chan); + fprintf(stderr, "NEG:%d) ", alu->src[1].neg); + fprintf(stderr, "LAST:%d)\n", alu->last); + id++; + fprintf(stderr, "%04d %08X %c ", id, bc->bytecode[id], alu->last ? '*' : ' '); + fprintf(stderr, "INST:%d ", alu->inst); + fprintf(stderr, "DST(SEL:%d ", alu->dst.sel); + fprintf(stderr, "CHAN:%d ", alu->dst.chan); + fprintf(stderr, "REL:%d ", alu->dst.rel); + fprintf(stderr, "CLAMP:%d) ", alu->dst.clamp); + fprintf(stderr, "BANK_SWIZZLE:%d ", alu->bank_swizzle); + if (alu->is_op3) { + fprintf(stderr, "SRC2(SEL:%d ", alu->src[2].sel); + fprintf(stderr, "REL:%d ", alu->src[2].rel); + fprintf(stderr, "CHAN:%d ", alu->src[2].chan); + fprintf(stderr, "NEG:%d)\n", alu->src[2].neg); + } else { + fprintf(stderr, "SRC0_ABS:%d ", alu->src[0].abs); + fprintf(stderr, "SRC1_ABS:%d ", alu->src[1].abs); + fprintf(stderr, "WRITE_MASK:%d ", alu->dst.write); + fprintf(stderr, "OMOD:%d ", alu->omod); + fprintf(stderr, "EXECUTE_MASK:%d ", alu->predicate); + fprintf(stderr, "UPDATE_PRED:%d\n", alu->predicate); + } + + id++; + if (alu->last) { + for (i = 0; i < nliteral; i++, id++) { + float *f = (float*)(bc->bytecode + id); + fprintf(stderr, "%04d %08X\t%f\n", id, bc->bytecode[id], *f); + } + id += nliteral & 1; + nliteral = 0; + } + } + + LIST_FOR_EACH_ENTRY(tex, &cf->tex, list) { + fprintf(stderr, "%04d %08X ", id, bc->bytecode[id]); + fprintf(stderr, "INST:%d ", tex->inst); + fprintf(stderr, "RESOURCE_ID:%d ", tex->resource_id); + fprintf(stderr, "SRC(GPR:%d ", tex->src_gpr); + fprintf(stderr, "REL:%d)\n", tex->src_rel); + id++; + fprintf(stderr, "%04d %08X ", id, bc->bytecode[id]); + fprintf(stderr, "DST(GPR:%d ", tex->dst_gpr); + fprintf(stderr, "REL:%d ", tex->dst_rel); + fprintf(stderr, "SEL_X:%d ", tex->dst_sel_x); + fprintf(stderr, "SEL_Y:%d ", tex->dst_sel_y); + fprintf(stderr, "SEL_Z:%d ", tex->dst_sel_z); + fprintf(stderr, "SEL_W:%d) ", tex->dst_sel_w); + fprintf(stderr, "LOD_BIAS:%d ", tex->lod_bias); + fprintf(stderr, "COORD_TYPE_X:%d ", tex->coord_type_x); + fprintf(stderr, "COORD_TYPE_Y:%d ", tex->coord_type_y); + fprintf(stderr, "COORD_TYPE_Z:%d ", tex->coord_type_z); + fprintf(stderr, "COORD_TYPE_W:%d\n", tex->coord_type_w); + id++; + fprintf(stderr, "%04d %08X ", id, bc->bytecode[id]); + fprintf(stderr, "OFFSET_X:%d ", tex->offset_x); + fprintf(stderr, "OFFSET_Y:%d ", tex->offset_y); + fprintf(stderr, "OFFSET_Z:%d ", tex->offset_z); + fprintf(stderr, "SAMPLER_ID:%d ", tex->sampler_id); + fprintf(stderr, "SRC(SEL_X:%d ", tex->src_sel_x); + fprintf(stderr, "SEL_Y:%d ", tex->src_sel_y); + fprintf(stderr, "SEL_Z:%d ", tex->src_sel_z); + fprintf(stderr, "SEL_W:%d)\n", tex->src_sel_w); + id++; + fprintf(stderr, "%04d %08X \n", id, bc->bytecode[id]); + id++; + } + + LIST_FOR_EACH_ENTRY(vtx, &cf->vtx, list) { + fprintf(stderr, "%04d %08X ", id, bc->bytecode[id]); + fprintf(stderr, "INST:%d ", vtx->inst); + fprintf(stderr, "FETCH_TYPE:%d ", vtx->fetch_type); + fprintf(stderr, "BUFFER_ID:%d\n", vtx->buffer_id); + id++; + /* This assumes that no semantic fetches exist */ + fprintf(stderr, "%04d %08X ", id, bc->bytecode[id]); + fprintf(stderr, "SRC(GPR:%d ", vtx->src_gpr); + fprintf(stderr, "SEL_X:%d) ", vtx->src_sel_x); + fprintf(stderr, "MEGA_FETCH_COUNT:%d ", vtx->mega_fetch_count); + fprintf(stderr, "DST(GPR:%d ", vtx->dst_gpr); + fprintf(stderr, "SEL_X:%d ", vtx->dst_sel_x); + fprintf(stderr, "SEL_Y:%d ", vtx->dst_sel_y); + fprintf(stderr, "SEL_Z:%d ", vtx->dst_sel_z); + fprintf(stderr, "SEL_W:%d) ", vtx->dst_sel_w); + fprintf(stderr, "USE_CONST_FIELDS:%d ", vtx->use_const_fields); + fprintf(stderr, "FORMAT(DATA:%d ", vtx->data_format); + fprintf(stderr, "NUM:%d ", vtx->num_format_all); + fprintf(stderr, "COMP:%d ", vtx->format_comp_all); + fprintf(stderr, "MODE:%d)\n", vtx->srf_mode_all); + id++; + fprintf(stderr, "%04d %08X ", id, bc->bytecode[id]); + fprintf(stderr, "OFFSET:%d\n", vtx->offset); + //TODO + id++; + fprintf(stderr, "%04d %08X \n", id, bc->bytecode[id]); + id++; + } } - bytecode[i++] = S_SQ_CF_WORD0_ADDR(0); - bytecode[i++] = S_SQ_CF_WORD1_CF_INST(V_SQ_CF_WORD1_SQ_CF_INST_RETURN) | - S_SQ_CF_WORD1_BARRIER(1); - - rstate = &ve->rstate; - rstate->id = R600_PIPE_STATE_FETCH_SHADER; - rstate->nregs = 0; - r600_pipe_state_add_reg(rstate, R_0288A4_SQ_PGM_RESOURCES_FS, - 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0288DC_SQ_PGM_CF_OFFSET_FS, - 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028894_SQ_PGM_START_FS, - r600_bo_offset(ve->fetch_shader) >> 8, - 0xFFFFFFFF, ve->fetch_shader); + + fprintf(stderr, "--------------------------------------\n"); } static void r600_vertex_data_type(enum pipe_format pformat, unsigned *format, @@ -1071,7 +1944,7 @@ static void r600_vertex_data_type(enum pipe_format pformat, unsigned *format, } switch (desc->channel[i].type) { - /* Half-floats, floats, doubles */ + /* Half-floats, floats, ints */ case UTIL_FORMAT_TYPE_FLOAT: switch (desc->channel[i].size) { case 16: @@ -1083,8 +1956,6 @@ static void r600_vertex_data_type(enum pipe_format pformat, unsigned *format, *format = FMT_16_16_FLOAT; break; case 3: - *format = FMT_16_16_16_FLOAT; - break; case 4: *format = FMT_16_16_16_16_FLOAT; break; @@ -1124,8 +1995,6 @@ static void r600_vertex_data_type(enum pipe_format pformat, unsigned *format, *format = FMT_8_8; break; case 3: - // *format = FMT_8_8_8; /* fails piglit draw-vertices test */ - // break; case 4: *format = FMT_8_8_8_8; break; @@ -1140,8 +2009,6 @@ static void r600_vertex_data_type(enum pipe_format pformat, unsigned *format, *format = FMT_16_16; break; case 3: - // *format = FMT_16_16_16; /* fails piglit draw-vertices test */ - // break; case 4: *format = FMT_16_16_16_16; break; @@ -1184,64 +2051,21 @@ out_unknown: R600_ERR("unsupported vertex format %s\n", util_format_name(pformat)); } -static void r600_bc(unsigned ndw, unsigned chiprev, u32 *bytecode) -{ - unsigned i; - char chip = '6'; - - switch (chiprev) { - case 1: - chip = '7'; - break; - case 2: - chip = 'E'; - break; - case 0: - default: - chip = '6'; - break; - } - fprintf(stderr, "bytecode %d dw -----------------------\n", ndw); - fprintf(stderr, " %c\n", chip); - for (i = 0; i < ndw; i++) { - fprintf(stderr, "0x%08X\n", bytecode[i]); - } - fprintf(stderr, "--------------------------------------\n"); -} - int r600_vertex_elements_build_fetch_shader(struct r600_pipe_context *rctx, struct r600_vertex_element *ve) { - unsigned ndw, i; - u32 *bytecode; - unsigned fetch_resource_start = 0, format, num_format, format_comp; + static int dump_shaders = -1; + + struct r600_bc bc; + struct r600_bc_vtx vtx; struct pipe_vertex_element *elements = ve->elements; const struct util_format_description *desc; - - /* 2 dwords for cf aligned to 4 + 4 dwords per input */ - ndw = 8 + ve->count * 4; - ve->fs_size = ndw * 4; - - /* use PIPE_BIND_VERTEX_BUFFER so we use the cache buffer manager */ - ve->fetch_shader = r600_bo(rctx->radeon, ndw*4, 256, PIPE_BIND_VERTEX_BUFFER, 0); - if (ve->fetch_shader == NULL) { - return -ENOMEM; - } - - bytecode = r600_bo_map(rctx->radeon, ve->fetch_shader, 0, NULL); - if (bytecode == NULL) { - r600_bo_reference(rctx->radeon, &ve->fetch_shader, NULL); - return -ENOMEM; - } - - if (rctx->family >= CHIP_CEDAR) { - eg_cf_vtx(ve, &bytecode[0], (ndw - 8) / 4); - } else { - r600_cf_vtx(ve, &bytecode[0], (ndw - 8) / 4); - fetch_resource_start = 160; - } + unsigned fetch_resource_start = rctx->family >= CHIP_CEDAR ? 0 : 160; + unsigned format, num_format, format_comp; + u32 *bytecode; + int i, r; /* vertex elements offset need special handling, if offset is bigger - * than what we can put in fetch instruction then we need to alterate + + * than what we can put in fetch instruction then we need to alterate * the vertex resource offset. In such case in order to simplify code * we will bound one resource per elements. It's a worst case scenario. */ @@ -1252,40 +2076,111 @@ int r600_vertex_elements_build_fetch_shader(struct r600_pipe_context *rctx, stru } } + memset(&bc, 0, sizeof(bc)); + r = r600_bc_init(&bc, r600_get_family(rctx->radeon)); + if (r) + return r; + + for (i = 0; i < ve->count; i++) { + if (elements[i].instance_divisor > 1) { + struct r600_bc_alu alu; + + memset(&alu, 0, sizeof(alu)); + alu.inst = BC_INST(&bc, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULHI_UINT); + alu.src[0].sel = 0; + alu.src[0].chan = 3; + + alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; + alu.src[1].value = (1ll << 32) / elements[i].instance_divisor + 1; + + alu.dst.sel = i + 1; + alu.dst.chan = 3; + alu.dst.write = 1; + alu.last = 1; + + if ((r = r600_bc_add_alu(&bc, &alu))) { + r600_bc_clear(&bc); + return r; + } + } + } + for (i = 0; i < ve->count; i++) { unsigned vbuffer_index; - r600_vertex_data_type(ve->hw_format[i], &format, &num_format, &format_comp); - desc = util_format_description(ve->hw_format[i]); + r600_vertex_data_type(ve->elements[i].src_format, &format, &num_format, &format_comp); + desc = util_format_description(ve->elements[i].src_format); if (desc == NULL) { - R600_ERR("unknown format %d\n", ve->hw_format[i]); - r600_bo_reference(rctx->radeon, &ve->fetch_shader, NULL); + r600_bc_clear(&bc); + R600_ERR("unknown format %d\n", ve->elements[i].src_format); return -EINVAL; } /* see above for vbuffer_need_offset explanation */ vbuffer_index = elements[i].vertex_buffer_index; - if (ve->vbuffer_need_offset) { - bytecode[8 + i * 4 + 0] = S_SQ_VTX_WORD0_BUFFER_ID(i + fetch_resource_start); - } else { - bytecode[8 + i * 4 + 0] = S_SQ_VTX_WORD0_BUFFER_ID(vbuffer_index + fetch_resource_start); + memset(&vtx, 0, sizeof(vtx)); + vtx.buffer_id = (ve->vbuffer_need_offset ? i : vbuffer_index) + fetch_resource_start; + vtx.fetch_type = elements[i].instance_divisor ? 1 : 0; + vtx.src_gpr = elements[i].instance_divisor > 1 ? i + 1 : 0; + vtx.src_sel_x = elements[i].instance_divisor ? 3 : 0; + vtx.mega_fetch_count = 0x1F; + vtx.dst_gpr = i + 1; + vtx.dst_sel_x = desc->swizzle[0]; + vtx.dst_sel_y = desc->swizzle[1]; + vtx.dst_sel_z = desc->swizzle[2]; + vtx.dst_sel_w = desc->swizzle[3]; + vtx.data_format = format; + vtx.num_format_all = num_format; + vtx.format_comp_all = format_comp; + vtx.srf_mode_all = 1; + vtx.offset = elements[i].src_offset; + + if ((r = r600_bc_add_vtx(&bc, &vtx))) { + r600_bc_clear(&bc); + return r; } - bytecode[8 + i * 4 + 0] |= S_SQ_VTX_WORD0_SRC_GPR(0) | - S_SQ_VTX_WORD0_SRC_SEL_X(0) | - S_SQ_VTX_WORD0_MEGA_FETCH_COUNT(0x1F); - bytecode[8 + i * 4 + 1] = S_SQ_VTX_WORD1_DST_SEL_X(desc->swizzle[0]) | - S_SQ_VTX_WORD1_DST_SEL_Y(desc->swizzle[1]) | - S_SQ_VTX_WORD1_DST_SEL_Z(desc->swizzle[2]) | - S_SQ_VTX_WORD1_DST_SEL_W(desc->swizzle[3]) | - S_SQ_VTX_WORD1_USE_CONST_FIELDS(0) | - S_SQ_VTX_WORD1_DATA_FORMAT(format) | - S_SQ_VTX_WORD1_NUM_FORMAT_ALL(num_format) | - S_SQ_VTX_WORD1_FORMAT_COMP_ALL(format_comp) | - S_SQ_VTX_WORD1_SRF_MODE_ALL(1) | - S_SQ_VTX_WORD1_GPR_DST_GPR(i + 1); - bytecode[8 + i * 4 + 2] = S_SQ_VTX_WORD2_OFFSET(elements[i].src_offset) | - S_SQ_VTX_WORD2_MEGA_FETCH(1); - bytecode[8 + i * 4 + 3] = 0; } + + r600_bc_add_cfinst(&bc, BC_INST(&bc, V_SQ_CF_WORD1_SQ_CF_INST_RETURN)); + + if ((r = r600_bc_build(&bc))) { + r600_bc_clear(&bc); + return r; + } + + if (dump_shaders == -1) + dump_shaders = debug_get_bool_option("R600_DUMP_SHADERS", FALSE); + + if (dump_shaders) { + fprintf(stderr, "--------------------------------------------------------------\n"); + r600_bc_dump(&bc); + fprintf(stderr, "______________________________________________________________\n"); + } + + ve->fs_size = bc.ndw*4; + + /* use PIPE_BIND_VERTEX_BUFFER so we use the cache buffer manager */ + ve->fetch_shader = r600_bo(rctx->radeon, ve->fs_size, 256, PIPE_BIND_VERTEX_BUFFER, 0); + if (ve->fetch_shader == NULL) { + r600_bc_clear(&bc); + return -ENOMEM; + } + + bytecode = r600_bo_map(rctx->radeon, ve->fetch_shader, 0, NULL); + if (bytecode == NULL) { + r600_bc_clear(&bc); + r600_bo_reference(rctx->radeon, &ve->fetch_shader, NULL); + return -ENOMEM; + } + + memcpy(bytecode, bc.bytecode, ve->fs_size); + r600_bo_unmap(rctx->radeon, ve->fetch_shader); + r600_bc_clear(&bc); + + if (rctx->family >= CHIP_CEDAR) + evergreen_fetch_shader(ve); + else + r600_fetch_shader(ve); + return 0; } diff --git a/src/gallium/drivers/r600/r600_asm.h b/src/gallium/drivers/r600/r600_asm.h index b147f0f5c8..27ea293ebe 100644 --- a/src/gallium/drivers/r600/r600_asm.h +++ b/src/gallium/drivers/r600/r600_asm.h @@ -25,9 +25,6 @@ #include "util/u_double_list.h" -#define NUM_OF_CYCLES 3 -#define NUM_OF_COMPONENTS 4 - struct r600_vertex_element; struct r600_pipe_context; @@ -37,6 +34,7 @@ struct r600_bc_alu_src { unsigned neg; unsigned abs; unsigned rel; + uint32_t value; }; struct r600_bc_alu_dst { @@ -49,19 +47,15 @@ struct r600_bc_alu_dst { struct r600_bc_alu { struct list_head list; - struct list_head bs_list; /* bank swizzle list */ struct r600_bc_alu_src src[3]; struct r600_bc_alu_dst dst; unsigned inst; unsigned last; unsigned is_op3; unsigned predicate; - unsigned nliteral; - unsigned literal_added; unsigned bank_swizzle; unsigned bank_swizzle_force; - u32 value[4]; - int hw_gpr[NUM_OF_CYCLES][NUM_OF_COMPONENTS]; + unsigned omod; }; struct r600_bc_tex { @@ -109,6 +103,7 @@ struct r600_bc_vtx { unsigned num_format_all; unsigned format_comp_all; unsigned srf_mode_all; + unsigned offset; }; struct r600_bc_output { @@ -122,9 +117,16 @@ struct r600_bc_output { unsigned swizzle_y; unsigned swizzle_z; unsigned swizzle_w; + unsigned burst_count; unsigned barrier; }; +struct r600_bc_kcache { + unsigned bank; + unsigned mode; + unsigned addr; +}; + struct r600_bc_cf { struct list_head list; unsigned inst; @@ -134,18 +136,15 @@ struct r600_bc_cf { unsigned cond; unsigned pop_count; unsigned cf_addr; /* control flow addr */ - unsigned kcache0_mode; - unsigned kcache1_mode; - unsigned kcache0_addr; - unsigned kcache1_addr; - unsigned kcache0_bank; - unsigned kcache1_bank; + struct r600_bc_kcache kcache[2]; unsigned r6xx_uses_waterfall; struct list_head alu; struct list_head tex; struct list_head vtx; struct r600_bc_output output; struct r600_bc_alu *curr_bs_head; + struct r600_bc_alu *prev_bs_head; + struct r600_bc_alu *prev2_bs_head; }; #define FC_NONE 0 @@ -191,26 +190,24 @@ struct r600_bc { /* eg_asm.c */ int eg_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf); -void eg_cf_vtx(struct r600_vertex_element *ve, u32 *bytecode, unsigned count); /* r600_asm.c */ int r600_bc_init(struct r600_bc *bc, enum radeon_family family); void r600_bc_clear(struct r600_bc *bc); int r600_bc_add_alu(struct r600_bc *bc, const struct r600_bc_alu *alu); -int r600_bc_add_literal(struct r600_bc *bc, const u32 *value); int r600_bc_add_vtx(struct r600_bc *bc, const struct r600_bc_vtx *vtx); int r600_bc_add_tex(struct r600_bc *bc, const struct r600_bc_tex *tex); int r600_bc_add_output(struct r600_bc *bc, const struct r600_bc_output *output); int r600_bc_build(struct r600_bc *bc); int r600_bc_add_cfinst(struct r600_bc *bc, int inst); int r600_bc_add_alu_type(struct r600_bc *bc, const struct r600_bc_alu *alu, int type); +void r600_bc_special_constants(u32 value, unsigned *sel, unsigned *neg); void r600_bc_dump(struct r600_bc *bc); -void r600_cf_vtx(struct r600_vertex_element *ve, u32 *bytecode, unsigned count); -void r600_cf_vtx_tc(struct r600_vertex_element *ve, u32 *bytecode, unsigned count); int r600_vertex_elements_build_fetch_shader(struct r600_pipe_context *rctx, struct r600_vertex_element *ve); /* r700_asm.c */ +void r700_bc_cf_vtx_build(uint32_t *bytecode, const struct r600_bc_cf *cf); int r700_bc_alu_build(struct r600_bc *bc, struct r600_bc_alu *alu, unsigned id); #endif diff --git a/src/gallium/drivers/r600/r600_blit.c b/src/gallium/drivers/r600/r600_blit.c index 0f04136fb2..04408a5cc8 100644 --- a/src/gallium/drivers/r600/r600_blit.c +++ b/src/gallium/drivers/r600/r600_blit.c @@ -36,6 +36,7 @@ static void r600_blitter_begin(struct pipe_context *ctx, enum r600_blitter_op op { struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + rctx->blit = true; r600_context_queries_suspend(&rctx->ctx); util_blitter_save_blend(rctx->blitter, rctx->states[R600_PIPE_STATE_BLEND]); @@ -53,9 +54,9 @@ static void r600_blitter_begin(struct pipe_context *ctx, enum r600_blitter_op op if (rctx->states[R600_PIPE_STATE_CLIP]) { util_blitter_save_clip(rctx->blitter, &rctx->clip); } - util_blitter_save_vertex_buffers(rctx->blitter, rctx->nvertex_buffer, rctx->vertex_buffer); - - rctx->vertex_elements = NULL; + util_blitter_save_vertex_buffers(rctx->blitter, + rctx->vbuf_mgr->nr_vertex_buffers, + rctx->vbuf_mgr->vertex_buffer); if (op & (R600_CLEAR_SURFACE | R600_COPY)) util_blitter_save_framebuffer(rctx->blitter, &rctx->framebuffer); @@ -76,21 +77,26 @@ static void r600_blitter_end(struct pipe_context *ctx) { struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; r600_context_queries_resume(&rctx->ctx); + rctx->blit = false; } -int r600_blit_uncompress_depth(struct pipe_context *ctx, struct r600_resource_texture *texture) +void r600_blit_uncompress_depth(struct pipe_context *ctx, struct r600_resource_texture *texture) { struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; struct pipe_surface *zsurf, *cbsurf, surf_tmpl; int level = 0; float depth = 1.0f; - surf_tmpl.format = texture->resource.base.b.format; + + if (!texture->dirty_db) + return; + + surf_tmpl.format = texture->resource.b.b.b.format; surf_tmpl.u.tex.level = level; surf_tmpl.u.tex.first_layer = 0; surf_tmpl.u.tex.last_layer = 0; surf_tmpl.usage = PIPE_BIND_DEPTH_STENCIL; - zsurf = ctx->create_surface(ctx, &texture->resource.base.b, &surf_tmpl); + zsurf = ctx->create_surface(ctx, &texture->resource.b.b.b, &surf_tmpl); surf_tmpl.format = ((struct pipe_resource*)texture->flushed_depth_texture)->format; surf_tmpl.usage = PIPE_BIND_RENDER_TARGET; @@ -108,8 +114,47 @@ int r600_blit_uncompress_depth(struct pipe_context *ctx, struct r600_resource_te pipe_surface_reference(&zsurf, NULL); pipe_surface_reference(&cbsurf, NULL); + texture->dirty_db = FALSE; +} + +void r600_flush_depth_textures(struct r600_pipe_context *rctx) +{ + unsigned int i; + + if (rctx->blit) return; + + /* FIXME: This handles fragment shader textures only. */ + + for (i = 0; i < rctx->ps_samplers.n_views; ++i) { + struct r600_pipe_sampler_view *view; + struct r600_resource_texture *tex; + + view = rctx->ps_samplers.views[i]; + if (!view) continue; + + tex = (struct r600_resource_texture *)view->base.texture; + if (!tex->depth) + continue; + + if (tex->is_flushing_texture) + continue; - return 0; + r600_blit_uncompress_depth(&rctx->context, tex); + } + + /* also check CB here */ + for (i = 0; i < rctx->framebuffer.nr_cbufs; i++) { + struct r600_resource_texture *tex; + tex = (struct r600_resource_texture *)rctx->framebuffer.cbufs[i]->texture; + + if (!tex->depth) + continue; + + if (tex->is_flushing_texture) + continue; + + r600_blit_uncompress_depth(&rctx->context, tex); + } } static void r600_clear(struct pipe_context *ctx, unsigned buffers, @@ -174,6 +219,52 @@ static void r600_hw_copy_region(struct pipe_context *ctx, r600_blitter_end(ctx); } +struct texture_orig_info { + unsigned format; + unsigned width0; + unsigned height0; +}; + +static void r600_compressed_to_blittable(struct pipe_resource *tex, + unsigned level, + struct texture_orig_info *orig) +{ + struct r600_resource_texture *rtex = (struct r600_resource_texture*)tex; + unsigned pixsize = util_format_get_blocksize(tex->format); + int new_format; + int new_height, new_width; + + orig->format = tex->format; + orig->width0 = tex->width0; + orig->height0 = tex->height0; + + if (pixsize == 8) + new_format = PIPE_FORMAT_R16G16B16A16_UNORM; /* 64-bit block */ + else + new_format = PIPE_FORMAT_R32G32B32A32_UNORM; /* 128-bit block */ + + new_width = util_format_get_nblocksx(tex->format, orig->width0); + new_height = util_format_get_nblocksy(tex->format, orig->height0); + + rtex->force_int_type = true; + tex->width0 = new_width; + tex->height0 = new_height; + tex->format = new_format; + +} + +static void r600_reset_blittable_to_compressed(struct pipe_resource *tex, + unsigned level, + struct texture_orig_info *orig) +{ + struct r600_resource_texture *rtex = (struct r600_resource_texture*)tex; + rtex->force_int_type = false; + + tex->format = orig->format; + tex->width0 = orig->width0; + tex->height0 = orig->height0; +} + static void r600_resource_copy_region(struct pipe_context *ctx, struct pipe_resource *dst, unsigned dst_level, @@ -182,15 +273,36 @@ static void r600_resource_copy_region(struct pipe_context *ctx, unsigned src_level, const struct pipe_box *src_box) { - boolean is_depth; - /* there is something wrong with depth resource copies at the moment so avoid them for now */ - is_depth = util_format_get_component_bits(src->format, UTIL_FORMAT_COLORSPACE_ZS, 0) != 0; - if (is_depth) - util_resource_copy_region(ctx, dst, dst_level, dstx, dsty, dstz, - src, src_level, src_box); - else - r600_hw_copy_region(ctx, dst, dst_level, dstx, dsty, dstz, - src, src_level, src_box); + struct r600_resource_texture *rsrc = (struct r600_resource_texture*)src; + struct texture_orig_info orig_info[2]; + boolean restore_orig[2]; + + if (rsrc->depth && !rsrc->is_flushing_texture) + r600_texture_depth_flush(ctx, src, FALSE); + + restore_orig[0] = restore_orig[1] = FALSE; + + if (util_format_is_compressed(src->format)) { + r600_compressed_to_blittable(src, src_level, &orig_info[0]); + restore_orig[0] = TRUE; + } + + if (util_format_is_compressed(dst->format)) { + r600_compressed_to_blittable(dst, dst_level, &orig_info[1]); + restore_orig[1] = TRUE; + /* translate the dst box as well */ + dstx = util_format_get_nblocksx(orig_info[1].format, dstx); + dsty = util_format_get_nblocksy(orig_info[1].format, dsty); + } + + r600_hw_copy_region(ctx, dst, dst_level, dstx, dsty, dstz, + src, src_level, src_box); + + if (restore_orig[0]) + r600_reset_blittable_to_compressed(src, src_level, &orig_info[0]); + + if (restore_orig[1]) + r600_reset_blittable_to_compressed(dst, dst_level, &orig_info[1]); } void r600_init_blit_functions(struct r600_pipe_context *rctx) @@ -200,3 +312,19 @@ void r600_init_blit_functions(struct r600_pipe_context *rctx) rctx->context.clear_depth_stencil = r600_clear_depth_stencil; rctx->context.resource_copy_region = r600_resource_copy_region; } + +void r600_blit_push_depth(struct pipe_context *ctx, struct r600_resource_texture *texture) +{ + struct pipe_box sbox; + + sbox.x = sbox.y = sbox.z = 0; + sbox.width = texture->resource.b.b.b.width0; + sbox.height = texture->resource.b.b.b.height0; + /* XXX that might be wrong */ + sbox.depth = 1; + + r600_hw_copy_region(ctx, (struct pipe_resource *)texture, 0, + 0, 0, 0, + (struct pipe_resource *)texture->flushed_depth_texture, 0, + &sbox); +} diff --git a/src/gallium/drivers/r600/r600_buffer.c b/src/gallium/drivers/r600/r600_buffer.c index 03a61a3213..6ced719c8f 100644 --- a/src/gallium/drivers/r600/r600_buffer.c +++ b/src/gallium/drivers/r600/r600_buffer.c @@ -29,83 +29,50 @@ #include <util/u_math.h> #include <util/u_inlines.h> #include <util/u_memory.h> -#include <util/u_upload_mgr.h> +#include "util/u_upload_mgr.h" + #include "state_tracker/drm_driver.h" + #include <xf86drm.h> #include "radeon_drm.h" + #include "r600.h" #include "r600_pipe.h" -extern struct u_resource_vtbl r600_buffer_vtbl; - - -struct pipe_resource *r600_buffer_create(struct pipe_screen *screen, - const struct pipe_resource *templ) -{ - struct r600_resource_buffer *rbuffer; - struct r600_bo *bo; - /* XXX We probably want a different alignment for buffers and textures. */ - unsigned alignment = 4096; - - rbuffer = CALLOC_STRUCT(r600_resource_buffer); - if (rbuffer == NULL) - return NULL; - - rbuffer->magic = R600_BUFFER_MAGIC; - rbuffer->user_buffer = NULL; - rbuffer->num_ranges = 0; - rbuffer->r.base.b = *templ; - pipe_reference_init(&rbuffer->r.base.b.reference, 1); - rbuffer->r.base.b.screen = screen; - rbuffer->r.base.vtbl = &r600_buffer_vtbl; - rbuffer->r.size = rbuffer->r.base.b.width0; - bo = r600_bo((struct radeon*)screen->winsys, rbuffer->r.base.b.width0, alignment, rbuffer->r.base.b.bind, rbuffer->r.base.b.usage); - if (bo == NULL) { - FREE(rbuffer); - return NULL; - } - rbuffer->r.bo = bo; - return &rbuffer->r.base.b; -} - -struct pipe_resource *r600_user_buffer_create(struct pipe_screen *screen, - void *ptr, unsigned bytes, - unsigned bind) -{ - struct r600_resource_buffer *rbuffer; - - rbuffer = CALLOC_STRUCT(r600_resource_buffer); - if (rbuffer == NULL) - return NULL; - - rbuffer->magic = R600_BUFFER_MAGIC; - pipe_reference_init(&rbuffer->r.base.b.reference, 1); - rbuffer->r.base.vtbl = &r600_buffer_vtbl; - rbuffer->r.base.b.screen = screen; - rbuffer->r.base.b.target = PIPE_BUFFER; - rbuffer->r.base.b.format = PIPE_FORMAT_R8_UNORM; - rbuffer->r.base.b.usage = PIPE_USAGE_IMMUTABLE; - rbuffer->r.base.b.bind = bind; - rbuffer->r.base.b.width0 = bytes; - rbuffer->r.base.b.height0 = 1; - rbuffer->r.base.b.depth0 = 1; - rbuffer->r.base.b.array_size = 1; - rbuffer->r.base.b.flags = 0; - rbuffer->num_ranges = 0; - rbuffer->r.bo = NULL; - rbuffer->user_buffer = ptr; - return &rbuffer->r.base.b; -} - static void r600_buffer_destroy(struct pipe_screen *screen, struct pipe_resource *buf) { + struct r600_screen *rscreen = (struct r600_screen*)screen; struct r600_resource_buffer *rbuffer = r600_buffer(buf); if (rbuffer->r.bo) { r600_bo_reference((struct radeon*)screen->winsys, &rbuffer->r.bo, NULL); } - FREE(rbuffer); + rbuffer->r.bo = NULL; + util_slab_free(&rscreen->pool_buffers, rbuffer); +} + +static struct pipe_transfer *r600_get_transfer(struct pipe_context *ctx, + struct pipe_resource *resource, + unsigned level, + unsigned usage, + const struct pipe_box *box) +{ + struct r600_pipe_context *rctx = (struct r600_pipe_context*)ctx; + struct pipe_transfer *transfer = util_slab_alloc(&rctx->pool_transfers); + + transfer->resource = resource; + transfer->level = level; + transfer->usage = usage; + transfer->box = *box; + transfer->stride = 0; + transfer->layer_stride = 0; + transfer->data = NULL; + + /* Note strides are zero, this is ok for buffers, but not for + * textures 2d & higher at least. + */ + return transfer; } static void *r600_buffer_transfer_map(struct pipe_context *pipe, @@ -114,29 +81,10 @@ static void *r600_buffer_transfer_map(struct pipe_context *pipe, struct r600_resource_buffer *rbuffer = r600_buffer(transfer->resource); int write = 0; uint8_t *data; - int i; - boolean flush = FALSE; - - if (rbuffer->user_buffer) - return (uint8_t*)rbuffer->user_buffer + transfer->box.x; - - if (transfer->usage & PIPE_TRANSFER_DISCARD) { - for (i = 0; i < rbuffer->num_ranges; i++) { - if ((transfer->box.x >= rbuffer->ranges[i].start) && - (transfer->box.x < rbuffer->ranges[i].end)) - flush = TRUE; - - if (flush) { - r600_bo_reference((struct radeon*)pipe->winsys, &rbuffer->r.bo, NULL); - rbuffer->num_ranges = 0; - rbuffer->r.bo = r600_bo((struct radeon*)pipe->winsys, - rbuffer->r.base.b.width0, 0, - rbuffer->r.base.b.bind, - rbuffer->r.base.b.usage); - break; - } - } - } + + if (rbuffer->r.b.user_ptr) + return (uint8_t*)rbuffer->r.b.user_ptr + transfer->box.x; + if (transfer->usage & PIPE_TRANSFER_DONTBLOCK) { /* FIXME */ } @@ -155,44 +103,122 @@ static void r600_buffer_transfer_unmap(struct pipe_context *pipe, { struct r600_resource_buffer *rbuffer = r600_buffer(transfer->resource); + if (rbuffer->r.b.user_ptr) + return; + if (rbuffer->r.bo) r600_bo_unmap((struct radeon*)pipe->winsys, rbuffer->r.bo); } static void r600_buffer_transfer_flush_region(struct pipe_context *pipe, - struct pipe_transfer *transfer, - const struct pipe_box *box) + struct pipe_transfer *transfer, + const struct pipe_box *box) { - struct r600_resource_buffer *rbuffer = r600_buffer(transfer->resource); - unsigned i; - unsigned offset = transfer->box.x + box->x; - unsigned length = box->width; +} - assert(box->x + box->width <= transfer->box.width); +static void r600_transfer_destroy(struct pipe_context *ctx, + struct pipe_transfer *transfer) +{ + struct r600_pipe_context *rctx = (struct r600_pipe_context*)ctx; + util_slab_free(&rctx->pool_transfers, transfer); +} - if (rbuffer->user_buffer) - return; +static void r600_buffer_transfer_inline_write(struct pipe_context *pipe, + struct pipe_resource *resource, + unsigned level, + unsigned usage, + const struct pipe_box *box, + const void *data, + unsigned stride, + unsigned layer_stride) +{ + struct radeon *ws = (struct radeon*)pipe->winsys; + struct r600_resource_buffer *rbuffer = r600_buffer(resource); + uint8_t *map = NULL; - /* mark the range as used */ - for(i = 0; i < rbuffer->num_ranges; ++i) { - if(offset <= rbuffer->ranges[i].end && rbuffer->ranges[i].start <= (offset+box->width)) { - rbuffer->ranges[i].start = MIN2(rbuffer->ranges[i].start, offset); - rbuffer->ranges[i].end = MAX2(rbuffer->ranges[i].end, (offset+length)); - return; - } - } + assert(rbuffer->r.b.user_ptr == NULL); + + map = r600_bo_map(ws, rbuffer->r.bo, + PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD | usage, + pipe); + + memcpy(map + box->x, data, box->width); - rbuffer->ranges[rbuffer->num_ranges].start = offset; - rbuffer->ranges[rbuffer->num_ranges].end = offset+length; - rbuffer->num_ranges++; + if (rbuffer->r.bo) + r600_bo_unmap(ws, rbuffer->r.bo); } -unsigned r600_buffer_is_referenced_by_cs(struct pipe_context *context, - struct pipe_resource *buf, - unsigned level, int layer) +static const struct u_resource_vtbl r600_buffer_vtbl = { - /* FIXME */ - return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE; + u_default_resource_get_handle, /* get_handle */ + r600_buffer_destroy, /* resource_destroy */ + r600_get_transfer, /* get_transfer */ + r600_transfer_destroy, /* transfer_destroy */ + r600_buffer_transfer_map, /* transfer_map */ + r600_buffer_transfer_flush_region, /* transfer_flush_region */ + r600_buffer_transfer_unmap, /* transfer_unmap */ + r600_buffer_transfer_inline_write /* transfer_inline_write */ +}; + +struct pipe_resource *r600_buffer_create(struct pipe_screen *screen, + const struct pipe_resource *templ) +{ + struct r600_screen *rscreen = (struct r600_screen*)screen; + struct r600_resource_buffer *rbuffer; + struct r600_bo *bo; + /* XXX We probably want a different alignment for buffers and textures. */ + unsigned alignment = 4096; + + rbuffer = util_slab_alloc(&rscreen->pool_buffers); + + rbuffer->magic = R600_BUFFER_MAGIC; + rbuffer->r.b.b.b = *templ; + pipe_reference_init(&rbuffer->r.b.b.b.reference, 1); + rbuffer->r.b.b.b.screen = screen; + rbuffer->r.b.b.vtbl = &r600_buffer_vtbl; + rbuffer->r.b.user_ptr = NULL; + rbuffer->r.size = rbuffer->r.b.b.b.width0; + rbuffer->r.bo_size = rbuffer->r.size; + + bo = r600_bo((struct radeon*)screen->winsys, + rbuffer->r.b.b.b.width0, + alignment, rbuffer->r.b.b.b.bind, + rbuffer->r.b.b.b.usage); + + if (bo == NULL) { + FREE(rbuffer); + return NULL; + } + rbuffer->r.bo = bo; + return &rbuffer->r.b.b.b; +} + +struct pipe_resource *r600_user_buffer_create(struct pipe_screen *screen, + void *ptr, unsigned bytes, + unsigned bind) +{ + struct r600_screen *rscreen = (struct r600_screen*)screen; + struct r600_resource_buffer *rbuffer; + + rbuffer = util_slab_alloc(&rscreen->pool_buffers); + + rbuffer->magic = R600_BUFFER_MAGIC; + pipe_reference_init(&rbuffer->r.b.b.b.reference, 1); + rbuffer->r.b.b.vtbl = &r600_buffer_vtbl; + rbuffer->r.b.b.b.screen = screen; + rbuffer->r.b.b.b.target = PIPE_BUFFER; + rbuffer->r.b.b.b.format = PIPE_FORMAT_R8_UNORM; + rbuffer->r.b.b.b.usage = PIPE_USAGE_IMMUTABLE; + rbuffer->r.b.b.b.bind = bind; + rbuffer->r.b.b.b.width0 = bytes; + rbuffer->r.b.b.b.height0 = 1; + rbuffer->r.b.b.b.depth0 = 1; + rbuffer->r.b.b.b.array_size = 1; + rbuffer->r.b.b.b.flags = 0; + rbuffer->r.b.user_ptr = ptr; + rbuffer->r.bo = NULL; + rbuffer->r.bo_size = 0; + return &rbuffer->r.b.b.b; } struct pipe_resource *r600_buffer_from_handle(struct pipe_screen *screen, @@ -213,82 +239,39 @@ struct pipe_resource *r600_buffer_from_handle(struct pipe_screen *screen, return NULL; } - pipe_reference_init(&rbuffer->base.b.reference, 1); - rbuffer->base.b.target = PIPE_BUFFER; - rbuffer->base.b.screen = screen; - rbuffer->base.vtbl = &r600_buffer_vtbl; + pipe_reference_init(&rbuffer->b.b.b.reference, 1); + rbuffer->b.b.b.target = PIPE_BUFFER; + rbuffer->b.b.b.screen = screen; + rbuffer->b.b.vtbl = &r600_buffer_vtbl; rbuffer->bo = bo; - return &rbuffer->base.b; + return &rbuffer->b.b.b; } -struct u_resource_vtbl r600_buffer_vtbl = +void r600_upload_index_buffer(struct r600_pipe_context *rctx, struct r600_drawl *draw) { - u_default_resource_get_handle, /* get_handle */ - r600_buffer_destroy, /* resource_destroy */ - r600_buffer_is_referenced_by_cs, /* is_buffer_referenced */ - u_default_get_transfer, /* get_transfer */ - u_default_transfer_destroy, /* transfer_destroy */ - r600_buffer_transfer_map, /* transfer_map */ - r600_buffer_transfer_flush_region, /* transfer_flush_region */ - r600_buffer_transfer_unmap, /* transfer_unmap */ - u_default_transfer_inline_write /* transfer_inline_write */ -}; + struct r600_resource_buffer *rbuffer = r600_buffer(draw->index_buffer); + boolean flushed; + + u_upload_data(rctx->vbuf_mgr->uploader, 0, + draw->info.count * draw->index_size, + rbuffer->r.b.user_ptr, + &draw->index_buffer_offset, + &draw->index_buffer, &flushed); +} -int r600_upload_index_buffer(struct r600_pipe_context *rctx, struct r600_drawl *draw) +void r600_upload_const_buffer(struct r600_pipe_context *rctx, struct r600_resource_buffer **rbuffer, + uint32_t *const_offset) { - struct pipe_resource *upload_buffer = NULL; - unsigned index_offset = draw->index_buffer_offset; - int ret = 0; - - if (r600_buffer_is_user_buffer(draw->index_buffer)) { - ret = u_upload_buffer(rctx->upload_ib, - index_offset, - draw->count * draw->index_size, - draw->index_buffer, - &index_offset, - &upload_buffer); - if (ret) { - goto done; - } - draw->index_buffer_offset = index_offset; - - /* Transfer ownership. */ - pipe_resource_reference(&draw->index_buffer, upload_buffer); - pipe_resource_reference(&upload_buffer, NULL); - } + if ((*rbuffer)->r.b.user_ptr) { + uint8_t *ptr = (*rbuffer)->r.b.user_ptr; + unsigned size = (*rbuffer)->r.b.b.b.width0; + boolean flushed; -done: - return ret; -} + *rbuffer = NULL; -int r600_upload_user_buffers(struct r600_pipe_context *rctx) -{ - enum pipe_error ret = PIPE_OK; - int i, nr; - - nr = rctx->vertex_elements->count; - nr = rctx->nvertex_buffer; - - for (i = 0; i < nr; i++) { -// struct pipe_vertex_buffer *vb = &rctx->vertex_buffer[rctx->vertex_elements->elements[i].vertex_buffer_index]; - struct pipe_vertex_buffer *vb = &rctx->vertex_buffer[i]; - - if (r600_buffer_is_user_buffer(vb->buffer)) { - struct pipe_resource *upload_buffer = NULL; - unsigned offset = 0; /*vb->buffer_offset * 4;*/ - unsigned size = vb->buffer->width0; - unsigned upload_offset; - ret = u_upload_buffer(rctx->upload_vb, - offset, size, - vb->buffer, - &upload_offset, &upload_buffer); - if (ret) - return ret; - - pipe_resource_reference(&vb->buffer, NULL); - vb->buffer = upload_buffer; - vb->buffer_offset = upload_offset; - } + u_upload_data(rctx->vbuf_mgr->uploader, 0, size, ptr, const_offset, + (struct pipe_resource**)rbuffer, &flushed); + } else { + *const_offset = 0; } - return ret; } diff --git a/src/gallium/drivers/r600/r600_opcodes.h b/src/gallium/drivers/r600/r600_opcodes.h index 2ee0c83e5d..a85d0bbf1e 100644 --- a/src/gallium/drivers/r600/r600_opcodes.h +++ b/src/gallium/drivers/r600/r600_opcodes.h @@ -330,10 +330,14 @@ #define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_CLAMPED_64 0x00000098 #define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SQRT_64 0x00000099 /* TODO Fill in more ALU */ +#define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT 0x0000009B +#define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_UINT_TO_FLT 0x0000009C +/* TODO Fill in more ALU */ #define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT_FLOOR 0x000000B1 #define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4 0x000000BE #define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4_IEEE 0x000000BF #define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE 0x000000C0 +#define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX4 0x000000C1 #define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT 0x000000CC #define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INTERP_XY 0x000000D6 diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c index 6842571044..0e28bda6eb 100644 --- a/src/gallium/drivers/r600/r600_pipe.c +++ b/src/gallium/drivers/r600/r600_pipe.c @@ -30,12 +30,13 @@ #include <tgsi/tgsi_util.h> #include <util/u_blitter.h> #include <util/u_double_list.h> +#include <util/u_format_s3tc.h> #include <util/u_transfer.h> #include <util/u_surface.h> #include <util/u_pack_color.h> #include <util/u_memory.h> #include <util/u_inlines.h> -#include <util/u_upload_mgr.h> +#include "util/u_upload_mgr.h" #include <pipebuffer/pb_buffer.h> #include "r600.h" #include "r600d.h" @@ -47,7 +48,7 @@ /* * pipe_context */ -static void r600_flush(struct pipe_context *ctx, unsigned flags, +static void r600_flush(struct pipe_context *ctx, struct pipe_fence_handle **fence) { struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; @@ -59,9 +60,6 @@ static void r600_flush(struct pipe_context *ctx, unsigned flags, if (!rctx->ctx.pm4_cdwords) return; - u_upload_flush(rctx->upload_vb); - u_upload_flush(rctx->upload_ib); - #if 0 sprintf(dname, "gallium-%08d.bof", dc); if (dc < 20) { @@ -71,6 +69,30 @@ static void r600_flush(struct pipe_context *ctx, unsigned flags, dc++; #endif r600_context_flush(&rctx->ctx); + + /* XXX This shouldn't be really necessary, but removing it breaks some tests. + * Needless buffer reallocations may significantly increase memory consumption, + * so getting rid of this call is important. */ + u_upload_flush(rctx->vbuf_mgr->uploader); +} + +static void r600_update_num_contexts(struct r600_screen *rscreen, int diff) +{ + pipe_mutex_lock(rscreen->mutex_num_contexts); + if (diff > 0) { + rscreen->num_contexts++; + + if (rscreen->num_contexts > 1) + util_slab_set_thread_safety(&rscreen->pool_buffers, + UTIL_SLAB_MULTITHREADED); + } else { + rscreen->num_contexts--; + + if (rscreen->num_contexts <= 1) + util_slab_set_thread_safety(&rscreen->pool_buffers, + UTIL_SLAB_SINGLETHREADED); + } + pipe_mutex_unlock(rscreen->mutex_num_contexts); } static void r600_destroy_context(struct pipe_context *context) @@ -79,8 +101,6 @@ static void r600_destroy_context(struct pipe_context *context) rctx->context.delete_depth_stencil_alpha_state(&rctx->context, rctx->custom_dsa_flush); - r600_end_vertex_translate(rctx); - r600_context_fini(&rctx->ctx); util_blitter_destroy(rctx->blitter); @@ -89,14 +109,11 @@ static void r600_destroy_context(struct pipe_context *context) free(rctx->states[i]); } - u_upload_destroy(rctx->upload_vb); - u_upload_destroy(rctx->upload_ib); + u_vbuf_mgr_destroy(rctx->vbuf_mgr); + util_slab_destroy(&rctx->pool_transfers); - if (rctx->tran.translate_cache) - translate_cache_destroy(rctx->tran.translate_cache); + r600_update_num_contexts(rctx->screen, -1); - FREE(rctx->ps_resource); - FREE(rctx->vs_resource); FREE(rctx); } @@ -108,6 +125,9 @@ static struct pipe_context *r600_create_context(struct pipe_screen *screen, void if (rctx == NULL) return NULL; + + r600_update_num_contexts(rscreen, 1); + rctx->context.winsys = rscreen->screen.winsys; rctx->context.screen = screen; rctx->context.priv = priv; @@ -123,6 +143,7 @@ static struct pipe_context *r600_create_context(struct pipe_screen *screen, void r600_init_query_functions(rctx); r600_init_context_resource_functions(rctx); r600_init_surface_functions(rctx); + rctx->context.draw_vbo = r600_draw_vbo; switch (r600_get_family(rctx->radeon)) { case CHIP_R600: @@ -137,7 +158,6 @@ static struct pipe_context *r600_create_context(struct pipe_screen *screen, void case CHIP_RV730: case CHIP_RV710: case CHIP_RV740: - rctx->context.draw_vbo = r600_draw_vbo; r600_init_state_functions(rctx); if (r600_context_init(&rctx->ctx, rctx->radeon)) { r600_destroy_context(&rctx->context); @@ -151,7 +171,9 @@ static struct pipe_context *r600_create_context(struct pipe_screen *screen, void case CHIP_CYPRESS: case CHIP_HEMLOCK: case CHIP_PALM: - rctx->context.draw_vbo = evergreen_draw; + case CHIP_BARTS: + case CHIP_TURKS: + case CHIP_CAICOS: evergreen_init_state_functions(rctx); if (evergreen_context_init(&rctx->ctx, rctx->radeon)) { r600_destroy_context(&rctx->context); @@ -165,41 +187,23 @@ static struct pipe_context *r600_create_context(struct pipe_screen *screen, void return NULL; } - rctx->upload_ib = u_upload_create(&rctx->context, 32 * 1024, 16, - PIPE_BIND_INDEX_BUFFER); - if (rctx->upload_ib == NULL) { - r600_destroy_context(&rctx->context); - return NULL; - } + util_slab_create(&rctx->pool_transfers, + sizeof(struct pipe_transfer), 64, + UTIL_SLAB_SINGLETHREADED); - rctx->upload_vb = u_upload_create(&rctx->context, 128 * 1024, 16, - PIPE_BIND_VERTEX_BUFFER); - if (rctx->upload_vb == NULL) { + rctx->vbuf_mgr = u_vbuf_mgr_create(&rctx->context, 1024 * 1024, 256, + PIPE_BIND_VERTEX_BUFFER | + PIPE_BIND_INDEX_BUFFER | + PIPE_BIND_CONSTANT_BUFFER, + U_VERTEX_FETCH_DWORD_ALIGNED); + if (!rctx->vbuf_mgr) { r600_destroy_context(&rctx->context); return NULL; } rctx->blitter = util_blitter_create(&rctx->context); if (rctx->blitter == NULL) { - FREE(rctx); - return NULL; - } - - rctx->tran.translate_cache = translate_cache_create(); - if (rctx->tran.translate_cache == NULL) { - FREE(rctx); - return NULL; - } - - rctx->vs_resource = CALLOC(R600_RESOURCE_ARRAY_SIZE, sizeof(struct r600_pipe_state)); - if (!rctx->vs_resource) { - FREE(rctx); - return NULL; - } - - rctx->ps_resource = CALLOC(R600_RESOURCE_ARRAY_SIZE, sizeof(struct r600_pipe_state)); - if (!rctx->ps_resource) { - FREE(rctx); + r600_destroy_context(&rctx->context); return NULL; } @@ -209,8 +213,6 @@ static struct pipe_context *r600_create_context(struct pipe_screen *screen, void else rctx->custom_dsa_flush = evergreen_create_db_flush_dsa(rctx); - r600_blit_uncompress_depth_ptr = r600_blit_uncompress_depth; - return &rctx->context; } @@ -243,6 +245,9 @@ static const char *r600_get_family_name(enum radeon_family family) case CHIP_CYPRESS: return "AMD CYPRESS"; case CHIP_HEMLOCK: return "AMD HEMLOCK"; case CHIP_PALM: return "AMD PALM"; + case CHIP_BARTS: return "AMD BARTS"; + case CHIP_TURKS: return "AMD TURKS"; + case CHIP_CAICOS: return "AMD CAICOS"; default: return "AMD unknown"; } } @@ -275,18 +280,32 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param) case PIPE_CAP_BLEND_EQUATION_SEPARATE: case PIPE_CAP_SM3: case PIPE_CAP_TEXTURE_SWIZZLE: - case PIPE_CAP_INDEP_BLEND_ENABLE: case PIPE_CAP_DEPTHSTENCIL_CLEAR_SEPARATE: case PIPE_CAP_DEPTH_CLAMP: case PIPE_CAP_SHADER_STENCIL_EXPORT: + case PIPE_CAP_TGSI_INSTANCEID: + case PIPE_CAP_VERTEX_ELEMENT_INSTANCE_DIVISOR: return 1; + case PIPE_CAP_INDEP_BLEND_ENABLE: + /* R600 doesn't support per-MRT blends */ + if (family == CHIP_R600) + return 0; + else + return 1; /* Unsupported features (boolean caps). */ - case PIPE_CAP_TIMER_QUERY: case PIPE_CAP_STREAM_OUTPUT: case PIPE_CAP_PRIMITIVE_RESTART: case PIPE_CAP_INDEP_BLEND_FUNC: /* FIXME allow this */ - return 0; + /* R600 doesn't support per-MRT blends */ + if (family == CHIP_R600) + return 0; + else + return 0; + + case PIPE_CAP_ARRAY_TEXTURES: + /* fix once the CS checker upstream is fixed */ + return debug_get_bool_option("R600_ARRAY_TEXTURE", FALSE); /* Texturing. */ case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: @@ -316,6 +335,10 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param) case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER: return 0; + /* Timer queries, present when the clock frequency is non zero. */ + case PIPE_CAP_TIMER_QUERY: + return r600_get_clock_crystal_freq(rscreen->radeon) != 0; + default: R600_ERR("r600: unknown param %d\n", param); return 0; @@ -380,9 +403,9 @@ static int r600_get_shader_param(struct pipe_screen* pscreen, unsigned shader, e case PIPE_SHADER_CAP_MAX_ADDRS: return 1; //max native address registers/* FIXME Isn't this equal to TEMPS? */ case PIPE_SHADER_CAP_MAX_CONSTS: - return 256; //max native parameters + return R600_MAX_CONST_BUFFER_SIZE; case PIPE_SHADER_CAP_MAX_CONST_BUFFERS: - return 1; + return R600_MAX_CONST_BUFFERS; case PIPE_SHADER_CAP_MAX_PREDS: return 0; /* FIXME */ case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED: @@ -403,8 +426,7 @@ static boolean r600_is_format_supported(struct pipe_screen* screen, enum pipe_format format, enum pipe_texture_target target, unsigned sample_count, - unsigned usage, - unsigned geom_flags) + unsigned usage) { unsigned retval = 0; if (target >= PIPE_MAX_TEXTURE_TYPES) { @@ -417,7 +439,7 @@ static boolean r600_is_format_supported(struct pipe_screen* screen, return FALSE; if ((usage & PIPE_BIND_SAMPLER_VIEW) && - r600_is_sampler_format_supported(format)) { + r600_is_sampler_format_supported(screen, format)) { retval |= PIPE_BIND_SAMPLER_VIEW; } @@ -438,9 +460,14 @@ static boolean r600_is_format_supported(struct pipe_screen* screen, retval |= PIPE_BIND_DEPTH_STENCIL; } - if ((usage & PIPE_BIND_VERTEX_BUFFER) && - r600_is_vertex_format_supported(format)) - retval |= PIPE_BIND_VERTEX_BUFFER; + if (usage & PIPE_BIND_VERTEX_BUFFER) { + struct r600_screen *rscreen = (struct r600_screen *)screen; + enum radeon_family family = r600_get_family(rscreen->radeon); + + if (r600_is_vertex_format_supported(format, family)) { + retval |= PIPE_BIND_VERTEX_BUFFER; + } + } if (usage & PIPE_BIND_TRANSFER_READ) retval |= PIPE_BIND_TRANSFER_READ; @@ -459,6 +486,8 @@ static void r600_destroy_screen(struct pipe_screen* pscreen) radeon_decref(rscreen->radeon); + util_slab_destroy(&rscreen->pool_buffers); + pipe_mutex_destroy(rscreen->mutex_num_contexts); FREE(rscreen); } @@ -485,6 +514,13 @@ struct pipe_screen *r600_screen_create(struct radeon *radeon) r600_init_screen_resource_functions(&rscreen->screen); rscreen->tiling_info = r600_get_tiling_info(radeon); + util_format_s3tc_init(); + + util_slab_create(&rscreen->pool_buffers, + sizeof(struct r600_resource_buffer), 64, + UTIL_SLAB_SINGLETHREADED); + + pipe_mutex_init(rscreen->mutex_num_contexts); return &rscreen->screen; } diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h index 485f42166d..396801e4a4 100644 --- a/src/gallium/drivers/r600/r600_pipe.h +++ b/src/gallium/drivers/r600/r600_pipe.h @@ -30,12 +30,16 @@ #include <pipe/p_screen.h> #include <pipe/p_context.h> #include <util/u_math.h> -#include "translate/translate_cache.h" +#include "util/u_slab.h" +#include "util/u_vbuf_mgr.h" #include "r600.h" #include "r600_public.h" #include "r600_shader.h" #include "r600_resource.h" +#define R600_MAX_CONST_BUFFERS 1 +#define R600_MAX_CONST_BUFFER_SIZE 4096 + enum r600_pipe_state_id { R600_PIPE_STATE_BLEND = 0, R600_PIPE_STATE_BLEND_COLOR, @@ -62,6 +66,11 @@ struct r600_screen { struct pipe_screen screen; struct radeon *radeon; struct r600_tiling_info *tiling_info; + struct util_slab_mempool pool_buffers; + unsigned num_contexts; + + /* for thread-safe write accessing to num_contexts */ + pipe_mutex mutex_num_contexts; }; struct r600_pipe_sampler_view { @@ -86,9 +95,7 @@ struct r600_vertex_element { unsigned count; struct pipe_vertex_element elements[PIPE_MAX_ATTRIBS]; - enum pipe_format hw_format[PIPE_MAX_ATTRIBS]; - unsigned hw_format_size[PIPE_MAX_ATTRIBS]; - boolean incompatible_layout; + struct u_vbuf_mgr_elements *vmgr_elements; struct r600_bo *fetch_shader; unsigned fs_size; struct r600_pipe_state rstate; @@ -111,30 +118,18 @@ struct r600_pipe_shader { #define NUM_TEX_UNITS 16 struct r600_textures_info { - struct r600_pipe_sampler_view *views[NUM_TEX_UNITS]; + struct r600_pipe_sampler_view *views[NUM_TEX_UNITS]; unsigned n_views; void *samplers[NUM_TEX_UNITS]; unsigned n_samplers; }; -/* vertex buffer translation context, used to translate vertex input that - * hw doesn't natively support, so far only FLOAT64 is unsupported. - */ -struct r600_translate_context { - /* Translate cache for incompatible vertex offset/stride/format fallback. */ - struct translate_cache *translate_cache; - /* The vertex buffer slot containing the translated buffer. */ - unsigned vb_slot; - void *new_velems; -}; - #define R600_CONSTANT_ARRAY_SIZE 256 #define R600_RESOURCE_ARRAY_SIZE 160 struct r600_pipe_context { struct pipe_context context; struct blitter_context *blitter; - struct pipe_framebuffer_state *pframebuffer; unsigned family; void *custom_dsa_flush; struct r600_screen *screen; @@ -142,43 +137,35 @@ struct r600_pipe_context { struct r600_pipe_state *states[R600_PIPE_NSTATES]; struct r600_context ctx; struct r600_vertex_element *vertex_elements; + struct r600_pipe_state fs_resource[PIPE_MAX_ATTRIBS]; struct pipe_framebuffer_state framebuffer; struct pipe_index_buffer index_buffer; - struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS]; - unsigned nvertex_buffer; unsigned cb_target_mask; /* for saving when using blitter */ struct pipe_stencil_ref stencil_ref; struct pipe_viewport_state viewport; struct pipe_clip_state clip; - unsigned nvs_resource; - struct r600_pipe_state *vs_resource; - struct r600_pipe_state *ps_resource; struct r600_pipe_state config; struct r600_pipe_shader *ps_shader; struct r600_pipe_shader *vs_shader; struct r600_pipe_state vs_const_buffer; + struct r600_pipe_state vs_const_buffer_resource[R600_MAX_CONST_BUFFERS]; struct r600_pipe_state ps_const_buffer; + struct r600_pipe_state ps_const_buffer_resource[R600_MAX_CONST_BUFFERS]; struct r600_pipe_rasterizer *rasterizer; /* shader information */ unsigned sprite_coord_enable; bool flatshade; - struct u_upload_mgr *upload_vb; - struct u_upload_mgr *upload_ib; - unsigned any_user_vbs; struct r600_textures_info ps_samplers; - unsigned vb_max_index; - struct r600_translate_context tran; + + struct u_vbuf_mgr *vbuf_mgr; + struct util_slab_mempool pool_transfers; + bool blit; }; struct r600_drawl { + struct pipe_draw_info info; struct pipe_context *ctx; - unsigned mode; - unsigned min_index; - unsigned max_index; - unsigned index_bias; - unsigned start; - unsigned count; unsigned index_size; unsigned index_buffer_offset; struct pipe_resource *index_buffer; @@ -187,16 +174,21 @@ struct r600_drawl { /* evergreen_state.c */ void evergreen_init_state_functions(struct r600_pipe_context *rctx); void evergreen_init_config(struct r600_pipe_context *rctx); -void evergreen_draw(struct pipe_context *ctx, const struct pipe_draw_info *info); void evergreen_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader *shader); void evergreen_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shader *shader); +void evergreen_fetch_shader(struct r600_vertex_element *ve); void *evergreen_create_db_flush_dsa(struct r600_pipe_context *rctx); void evergreen_polygon_offset_update(struct r600_pipe_context *rctx); -void evergreen_vertex_buffer_update(struct r600_pipe_context *rctx); +void evergreen_pipe_set_buffer_resource(struct r600_pipe_context *rctx, + struct r600_pipe_state *rstate, + struct r600_resource *rbuffer, + unsigned offset, unsigned stride); /* r600_blit.c */ void r600_init_blit_functions(struct r600_pipe_context *rctx); -int r600_blit_uncompress_depth(struct pipe_context *ctx, struct r600_resource_texture *texture); +void r600_blit_uncompress_depth(struct pipe_context *ctx, struct r600_resource_texture *texture); +void r600_blit_push_depth(struct pipe_context *ctx, struct r600_resource_texture *texture); +void r600_flush_depth_textures(struct r600_pipe_context *rctx); /* r600_buffer.c */ struct pipe_resource *r600_buffer_create(struct pipe_screen *screen, @@ -204,13 +196,9 @@ struct pipe_resource *r600_buffer_create(struct pipe_screen *screen, struct pipe_resource *r600_user_buffer_create(struct pipe_screen *screen, void *ptr, unsigned bytes, unsigned bind); -unsigned r600_buffer_is_referenced_by_cs(struct pipe_context *context, - struct pipe_resource *buf, - unsigned level, int layer); struct pipe_resource *r600_buffer_from_handle(struct pipe_screen *screen, struct winsys_handle *whandle); -int r600_upload_index_buffer(struct r600_pipe_context *rctx, struct r600_drawl *draw); -int r600_upload_user_buffers(struct r600_pipe_context *rctx); +void r600_upload_index_buffer(struct r600_pipe_context *rctx, struct r600_drawl *draw); /* r600_query.c */ void r600_init_query_functions(struct r600_pipe_context *rctx); @@ -219,7 +207,6 @@ void r600_init_query_functions(struct r600_pipe_context *rctx); void r600_init_context_resource_functions(struct r600_pipe_context *r600); /* r600_shader.c */ -int r600_pipe_shader(struct pipe_context *ctx, struct r600_pipe_shader *shader); int r600_pipe_shader_create(struct pipe_context *ctx, struct r600_pipe_shader *shader, const struct tgsi_token *tokens); void r600_pipe_shader_destroy(struct pipe_context *ctx, struct r600_pipe_shader *shader); int r600_find_vs_semantic_index(struct r600_shader *vs, @@ -227,11 +214,17 @@ int r600_find_vs_semantic_index(struct r600_shader *vs, /* r600_state.c */ void r600_init_state_functions(struct r600_pipe_context *rctx); -void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info); +void r600_spi_update(struct r600_pipe_context *rctx); void r600_init_config(struct r600_pipe_context *rctx); +void r600_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader *shader); +void r600_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shader *shader); +void r600_fetch_shader(struct r600_vertex_element *ve); void *r600_create_db_flush_dsa(struct r600_pipe_context *rctx); void r600_polygon_offset_update(struct r600_pipe_context *rctx); -void r600_vertex_buffer_update(struct r600_pipe_context *rctx); +void r600_pipe_set_buffer_resource(struct r600_pipe_context *rctx, + struct r600_pipe_state *rstate, + struct r600_resource *rbuffer, + unsigned offset, unsigned stride); /* r600_helper.h */ int r600_conv_pipe_prim(unsigned pprim, unsigned *prim); @@ -239,15 +232,13 @@ int r600_conv_pipe_prim(unsigned pprim, unsigned *prim); /* r600_texture.c */ void r600_init_screen_texture_functions(struct pipe_screen *screen); void r600_init_surface_functions(struct r600_pipe_context *r600); -uint32_t r600_translate_texformat(enum pipe_format format, - const unsigned char *swizzle_view, +uint32_t r600_translate_texformat(struct pipe_screen *screen, enum pipe_format format, + const unsigned char *swizzle_view, uint32_t *word4_p, uint32_t *yuv_format_p); unsigned r600_texture_get_offset(struct r600_resource_texture *rtex, unsigned level, unsigned layer); /* r600_translate.c */ -void r600_begin_vertex_translate(struct r600_pipe_context *rctx); -void r600_end_vertex_translate(struct r600_pipe_context *rctx); void r600_translate_index_buffer(struct r600_pipe_context *r600, struct pipe_resource **index_buffer, unsigned *index_size, @@ -270,13 +261,16 @@ void r600_sampler_view_destroy(struct pipe_context *ctx, void r600_bind_state(struct pipe_context *ctx, void *state); void r600_delete_state(struct pipe_context *ctx, void *state); void r600_bind_vertex_elements(struct pipe_context *ctx, void *state); - void *r600_create_shader_state(struct pipe_context *ctx, const struct pipe_shader_state *state); void r600_bind_ps_shader(struct pipe_context *ctx, void *state); void r600_bind_vs_shader(struct pipe_context *ctx, void *state); void r600_delete_ps_shader(struct pipe_context *ctx, void *state); void r600_delete_vs_shader(struct pipe_context *ctx, void *state); +void r600_set_constant_buffer(struct pipe_context *ctx, uint shader, uint index, + struct pipe_resource *buffer); +void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info); + /* * common helpers */ diff --git a/src/gallium/drivers/r600/r600_query.c b/src/gallium/drivers/r600/r600_query.c index 726668260c..181ea3f9e4 100644 --- a/src/gallium/drivers/r600/r600_query.c +++ b/src/gallium/drivers/r600/r600_query.c @@ -21,6 +21,7 @@ * USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include "r600_pipe.h" +#include "r600d.h" static struct pipe_query *r600_create_query(struct pipe_context *ctx, unsigned query_type) { @@ -61,11 +62,35 @@ static boolean r600_get_query_result(struct pipe_context *ctx, struct r600_query *rquery = (struct r600_query *)query; if (rquery->num_results) { - ctx->flush(ctx, 0, NULL); + ctx->flush(ctx, NULL); } return r600_context_query_result(&rctx->ctx, (struct r600_query *)query, wait, vresult); } +static void r600_render_condition(struct pipe_context *ctx, + struct pipe_query *query, + uint mode) +{ + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + struct r600_query *rquery = (struct r600_query *)query; + int wait_flag = 0; + + if (!query) { + rctx->ctx.predicate_drawing = false; + r600_query_predication(&rctx->ctx, NULL, PREDICATION_OP_CLEAR, 1); + return; + } + + if (mode == PIPE_RENDER_COND_WAIT || + mode == PIPE_RENDER_COND_BY_REGION_WAIT) { + wait_flag = 1; + } + + rctx->ctx.predicate_drawing = true; + r600_query_predication(&rctx->ctx, rquery, PREDICATION_OP_ZPASS, wait_flag); + +} + void r600_init_query_functions(struct r600_pipe_context *rctx) { rctx->context.create_query = r600_create_query; @@ -73,4 +98,7 @@ void r600_init_query_functions(struct r600_pipe_context *rctx) rctx->context.begin_query = r600_begin_query; rctx->context.end_query = r600_end_query; rctx->context.get_query_result = r600_get_query_result; + + if (r600_get_num_backends(rctx->screen->radeon) > 0) + rctx->context.render_condition = r600_render_condition; } diff --git a/src/gallium/drivers/r600/r600_resource.c b/src/gallium/drivers/r600/r600_resource.c index 207642ccfa..f3ab3613c8 100644 --- a/src/gallium/drivers/r600/r600_resource.c +++ b/src/gallium/drivers/r600/r600_resource.c @@ -61,5 +61,4 @@ void r600_init_context_resource_functions(struct r600_pipe_context *r600) r600->context.transfer_unmap = u_transfer_unmap_vtbl; r600->context.transfer_destroy = u_transfer_destroy_vtbl; r600->context.transfer_inline_write = u_transfer_inline_write_vtbl; - r600->context.is_resource_referenced = u_is_resource_referenced_vtbl; } diff --git a/src/gallium/drivers/r600/r600_resource.h b/src/gallium/drivers/r600/r600_resource.h index 25aa84682c..836e7491f1 100644 --- a/src/gallium/drivers/r600/r600_resource.h +++ b/src/gallium/drivers/r600/r600_resource.h @@ -24,6 +24,7 @@ #define R600_RESOURCE_H #include "util/u_transfer.h" +#include "util/u_vbuf_mgr.h" /* flag to indicate a resource is to be used as a transfer so should not be tiled */ #define R600_RESOURCE_FLAG_TRANSFER PIPE_RESOURCE_FLAG_DRV_PRIV @@ -43,25 +44,45 @@ struct r600_transfer { * underlying implementations. */ struct r600_resource { - struct u_resource base; + struct u_vbuf_resource b; struct r600_bo *bo; u32 size; + unsigned bo_size; }; struct r600_resource_texture { struct r600_resource resource; unsigned offset[PIPE_MAX_TEXTURE_LEVELS]; - unsigned pitch_in_bytes[PIPE_MAX_TEXTURE_LEVELS]; - unsigned pitch_in_pixels[PIPE_MAX_TEXTURE_LEVELS]; + unsigned pitch_in_bytes[PIPE_MAX_TEXTURE_LEVELS]; /* transfer */ + unsigned pitch_in_blocks[PIPE_MAX_TEXTURE_LEVELS]; /* texture resource */ unsigned layer_size[PIPE_MAX_TEXTURE_LEVELS]; unsigned array_mode[PIPE_MAX_TEXTURE_LEVELS]; unsigned pitch_override; unsigned size; - unsigned tiled; unsigned tile_type; unsigned depth; - unsigned dirty; - struct r600_resource_texture *flushed_depth_texture; + unsigned dirty_db; + struct r600_resource_texture *flushed_depth_texture; + boolean is_flushing_texture; + + /* on some cards we have to use integer 64/128-bit types + for s3tc blits, do this until gallium grows int formats */ + boolean force_int_type; +}; + +#define R600_TEX_IS_TILED(tex, level) ((tex)->array_mode[level] != V_038000_ARRAY_LINEAR_GENERAL && (tex)->array_mode[level] != V_038000_ARRAY_LINEAR_ALIGNED) + +#define R600_BUFFER_MAGIC 0xabcd1600 + +/* XXX this could be removed */ +struct r600_resource_buffer { + struct r600_resource r; + uint32_t magic; +}; + +struct r600_surface { + struct pipe_surface base; + unsigned aligned_height; }; void r600_init_screen_resource_functions(struct pipe_screen *screen); @@ -73,41 +94,17 @@ struct pipe_resource *r600_texture_from_handle(struct pipe_screen *screen, const struct pipe_resource *base, struct winsys_handle *whandle); -#define R600_BUFFER_MAGIC 0xabcd1600 -#define R600_BUFFER_MAX_RANGES 32 - -struct r600_buffer_range { - uint32_t start; - uint32_t end; -}; - -struct r600_resource_buffer { - struct r600_resource r; - uint32_t magic; - void *user_buffer; - struct r600_buffer_range ranges[R600_BUFFER_MAX_RANGES]; - unsigned num_ranges; -}; - /* r600_buffer */ static INLINE struct r600_resource_buffer *r600_buffer(struct pipe_resource *buffer) { if (buffer) { assert(((struct r600_resource_buffer *)buffer)->magic == R600_BUFFER_MAGIC); return (struct r600_resource_buffer *)buffer; - } - return NULL; -} - -static INLINE boolean r600_buffer_is_user_buffer(struct pipe_resource *buffer) -{ - return r600_buffer(buffer)->user_buffer ? TRUE : FALSE; + } + return NULL; } -int r600_texture_depth_flush(struct pipe_context *ctx, - struct pipe_resource *texture); - -extern int (*r600_blit_uncompress_depth_ptr)(struct pipe_context *ctx, struct r600_resource_texture *texture); +int r600_texture_depth_flush(struct pipe_context *ctx, struct pipe_resource *texture, boolean just_create); /* r600_texture.c texture transfer functions. */ struct pipe_transfer* r600_texture_get_transfer(struct pipe_context *ctx, @@ -122,9 +119,8 @@ void* r600_texture_transfer_map(struct pipe_context *ctx, void r600_texture_transfer_unmap(struct pipe_context *ctx, struct pipe_transfer* transfer); -struct r600_surface { - struct pipe_surface base; - unsigned aligned_height; -}; +struct r600_pipe_context; + +void r600_upload_const_buffer(struct r600_pipe_context *rctx, struct r600_resource_buffer **rbuffer, uint32_t *offset); #endif diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index d6455023a3..e7285d624e 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -28,60 +28,12 @@ #include "r600_pipe.h" #include "r600_asm.h" #include "r600_sq.h" +#include "r600_formats.h" #include "r600_opcodes.h" #include "r600d.h" #include <stdio.h> #include <errno.h> -static void r600_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shader *shader) -{ - struct r600_pipe_state *rstate = &shader->rstate; - struct r600_shader *rshader = &shader->shader; - unsigned spi_vs_out_id[10]; - unsigned i, tmp; - - /* clear previous register */ - rstate->nregs = 0; - - /* so far never got proper semantic id from tgsi */ - /* FIXME better to move this in config things so they get emited - * only one time per cs - */ - for (i = 0; i < 10; i++) { - spi_vs_out_id[i] = 0; - } - for (i = 0; i < 32; i++) { - tmp = i << ((i & 3) * 8); - spi_vs_out_id[i / 4] |= tmp; - } - for (i = 0; i < 10; i++) { - r600_pipe_state_add_reg(rstate, - R_028614_SPI_VS_OUT_ID_0 + i * 4, - spi_vs_out_id[i], 0xFFFFFFFF, NULL); - } - - r600_pipe_state_add_reg(rstate, - R_0286C4_SPI_VS_OUT_CONFIG, - S_0286C4_VS_EXPORT_COUNT(rshader->noutput - 2), - 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, - R_028868_SQ_PGM_RESOURCES_VS, - S_028868_NUM_GPRS(rshader->bc.ngpr) | - S_028868_STACK_SIZE(rshader->bc.nstack), - 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, - R_0288D0_SQ_PGM_CF_OFFSET_VS, - 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, - R_028858_SQ_PGM_START_VS, - r600_bo_offset(shader->bo) >> 8, 0xFFFFFFFF, shader->bo); - - r600_pipe_state_add_reg(rstate, - R_03E200_SQ_LOOP_CONST_0 + (32 * 4), 0x01000FFF, - 0xFFFFFFFF, NULL); - -} - int r600_find_vs_semantic_index(struct r600_shader *vs, struct r600_shader *ps, int id) { @@ -96,98 +48,7 @@ int r600_find_vs_semantic_index(struct r600_shader *vs, return 0; } -static void r600_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader *shader) -{ - struct r600_pipe_state *rstate = &shader->rstate; - struct r600_shader *rshader = &shader->shader; - unsigned i, exports_ps, num_cout, spi_ps_in_control_0, spi_input_z, spi_ps_in_control_1; - int pos_index = -1, face_index = -1; - - rstate->nregs = 0; - - for (i = 0; i < rshader->ninput; i++) { - if (rshader->input[i].name == TGSI_SEMANTIC_POSITION) - pos_index = i; - if (rshader->input[i].name == TGSI_SEMANTIC_FACE) - face_index = i; - } - - for (i = 0; i < rshader->noutput; i++) { - if (rshader->output[i].name == TGSI_SEMANTIC_POSITION) - r600_pipe_state_add_reg(rstate, - R_02880C_DB_SHADER_CONTROL, - S_02880C_Z_EXPORT_ENABLE(1), - S_02880C_Z_EXPORT_ENABLE(1), NULL); - if (rshader->output[i].name == TGSI_SEMANTIC_STENCIL) - r600_pipe_state_add_reg(rstate, - R_02880C_DB_SHADER_CONTROL, - S_02880C_STENCIL_REF_EXPORT_ENABLE(1), - S_02880C_STENCIL_REF_EXPORT_ENABLE(1), NULL); - } - - exports_ps = 0; - num_cout = 0; - for (i = 0; i < rshader->noutput; i++) { - if (rshader->output[i].name == TGSI_SEMANTIC_POSITION || rshader->output[i].name == TGSI_SEMANTIC_STENCIL) - exports_ps |= 1; - else if (rshader->output[i].name == TGSI_SEMANTIC_COLOR) { - num_cout++; - } - } - exports_ps |= S_028854_EXPORT_COLORS(num_cout); - if (!exports_ps) { - /* always at least export 1 component per pixel */ - exports_ps = 2; - } - - spi_ps_in_control_0 = S_0286CC_NUM_INTERP(rshader->ninput) | - S_0286CC_PERSP_GRADIENT_ENA(1); - spi_input_z = 0; - if (pos_index != -1) { - spi_ps_in_control_0 |= (S_0286CC_POSITION_ENA(1) | - S_0286CC_POSITION_CENTROID(rshader->input[pos_index].centroid) | - S_0286CC_POSITION_ADDR(rshader->input[pos_index].gpr) | - S_0286CC_BARYC_SAMPLE_CNTL(1)); - spi_input_z |= 1; - } - - spi_ps_in_control_1 = 0; - if (face_index != -1) { - spi_ps_in_control_1 |= S_0286D0_FRONT_FACE_ENA(1) | - S_0286D0_FRONT_FACE_ADDR(rshader->input[face_index].gpr); - } - - r600_pipe_state_add_reg(rstate, R_0286CC_SPI_PS_IN_CONTROL_0, spi_ps_in_control_0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0286D0_SPI_PS_IN_CONTROL_1, spi_ps_in_control_1, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0286D8_SPI_INPUT_Z, spi_input_z, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, - R_028840_SQ_PGM_START_PS, - r600_bo_offset(shader->bo) >> 8, 0xFFFFFFFF, shader->bo); - r600_pipe_state_add_reg(rstate, - R_028850_SQ_PGM_RESOURCES_PS, - S_028868_NUM_GPRS(rshader->bc.ngpr) | - S_028868_STACK_SIZE(rshader->bc.nstack), - 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, - R_028854_SQ_PGM_EXPORTS_PS, - exports_ps, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, - R_0288CC_SQ_PGM_CF_OFFSET_PS, - 0x00000000, 0xFFFFFFFF, NULL); - - if (rshader->uses_kill) { - /* only set some bits here, the other bits are set in the dsa state */ - r600_pipe_state_add_reg(rstate, - R_02880C_DB_SHADER_CONTROL, - S_02880C_KILL_ENABLE(1), - S_02880C_KILL_ENABLE(1), NULL); - } - r600_pipe_state_add_reg(rstate, - R_03E200_SQ_LOOP_CONST_0, 0x01000FFF, - 0xFFFFFFFF, NULL); -} - -int r600_pipe_shader(struct pipe_context *ctx, struct r600_pipe_shader *shader) +static int r600_pipe_shader(struct pipe_context *ctx, struct r600_pipe_shader *shader) { struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; struct r600_shader *rshader = &shader->shader; @@ -225,14 +86,23 @@ int r600_pipe_shader(struct pipe_context *ctx, struct r600_pipe_shader *shader) return 0; } -int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader); +static int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader); + int r600_pipe_shader_create(struct pipe_context *ctx, struct r600_pipe_shader *shader, const struct tgsi_token *tokens) { + static int dump_shaders = -1; struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; int r; -//fprintf(stderr, "--------------------------------------------------------------\n"); -//tgsi_dump(tokens, 0); + /* Would like some magic "get_bool_option_once" routine. + */ + if (dump_shaders == -1) + dump_shaders = debug_get_bool_option("R600_DUMP_SHADERS", FALSE); + + if (dump_shaders) { + fprintf(stderr, "--------------------------------------------------------------\n"); + tgsi_dump(tokens, 0); + } shader->shader.family = r600_get_family(rctx->radeon); r = r600_shader_from_tgsi(tokens, &shader->shader); if (r) { @@ -244,8 +114,10 @@ int r600_pipe_shader_create(struct pipe_context *ctx, struct r600_pipe_shader *s R600_ERR("building bytecode failed !\n"); return r; } -//r600_bc_dump(&shader->shader.bc); -//fprintf(stderr, "______________________________________________________________\n"); + if (dump_shaders) { + r600_bc_dump(&shader->shader.bc); + fprintf(stderr, "______________________________________________________________\n"); + } return r600_pipe_shader(ctx, shader); } @@ -262,6 +134,15 @@ void r600_pipe_shader_destroy(struct pipe_context *ctx, struct r600_pipe_shader */ struct r600_shader_tgsi_instruction; +struct r600_shader_src { + unsigned sel; + unsigned swizzle[4]; + unsigned neg; + unsigned abs; + unsigned rel; + uint32_t value[4]; +}; + struct r600_shader_ctx { struct tgsi_shader_info info; struct tgsi_parse_context parse; @@ -269,10 +150,11 @@ struct r600_shader_ctx { unsigned type; unsigned file_offset[TGSI_FILE_COUNT]; unsigned temp_reg; + unsigned ar_reg; struct r600_shader_tgsi_instruction *inst_info; struct r600_bc *bc; struct r600_shader *shader; - u32 value[4]; + struct r600_shader_src src[3]; u32 *literals; u32 nliterals; u32 max_driver_temp_used; @@ -391,6 +273,7 @@ static int tgsi_declaration(struct r600_shader_ctx *ctx) { struct tgsi_full_declaration *d = &ctx->parse.FullToken.FullDeclaration; unsigned i; + int r; switch (d->Declaration.File) { case TGSI_FILE_INPUT: @@ -422,6 +305,26 @@ static int tgsi_declaration(struct r600_shader_ctx *ctx) case TGSI_FILE_SAMPLER: case TGSI_FILE_ADDRESS: break; + + case TGSI_FILE_SYSTEM_VALUE: + if (d->Semantic.Name == TGSI_SEMANTIC_INSTANCEID) { + struct r600_bc_alu alu; + memset(&alu, 0, sizeof(struct r600_bc_alu)); + + alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT); + alu.src[0].sel = 0; + alu.src[0].chan = 3; + + alu.dst.sel = 0; + alu.dst.chan = 3; + alu.dst.write = 1; + alu.last = 1; + + if ((r = r600_bc_add_alu(ctx->bc, &alu))) + return r; + break; + } + default: R600_ERR("unsupported file %d declaration\n", d->Declaration.File); return -EINVAL; @@ -481,9 +384,187 @@ static int evergreen_gpr_count(struct r600_shader_ctx *ctx) return ctx->num_interp_gpr; } -int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader) +static void tgsi_src(struct r600_shader_ctx *ctx, + const struct tgsi_full_src_register *tgsi_src, + struct r600_shader_src *r600_src) +{ + memset(r600_src, 0, sizeof(*r600_src)); + r600_src->swizzle[0] = tgsi_src->Register.SwizzleX; + r600_src->swizzle[1] = tgsi_src->Register.SwizzleY; + r600_src->swizzle[2] = tgsi_src->Register.SwizzleZ; + r600_src->swizzle[3] = tgsi_src->Register.SwizzleW; + r600_src->neg = tgsi_src->Register.Negate; + r600_src->abs = tgsi_src->Register.Absolute; + + if (tgsi_src->Register.File == TGSI_FILE_IMMEDIATE) { + int index; + if ((tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleY) && + (tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleZ) && + (tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleW)) { + + index = tgsi_src->Register.Index * 4 + tgsi_src->Register.SwizzleX; + r600_bc_special_constants(ctx->literals[index], &r600_src->sel, &r600_src->neg); + if (r600_src->sel != V_SQ_ALU_SRC_LITERAL) + return; + } + index = tgsi_src->Register.Index; + r600_src->sel = V_SQ_ALU_SRC_LITERAL; + memcpy(r600_src->value, ctx->literals + index * 4, sizeof(r600_src->value)); + } else if (tgsi_src->Register.File == TGSI_FILE_SYSTEM_VALUE) { + /* assume we wan't TGSI_SEMANTIC_INSTANCEID here */ + r600_src->swizzle[0] = 3; + r600_src->swizzle[1] = 3; + r600_src->swizzle[2] = 3; + r600_src->swizzle[3] = 3; + r600_src->sel = 0; + } else { + if (tgsi_src->Register.Indirect) + r600_src->rel = V_SQ_REL_RELATIVE; + r600_src->sel = tgsi_src->Register.Index; + r600_src->sel += ctx->file_offset[tgsi_src->Register.File]; + } +} + +static int tgsi_fetch_rel_const(struct r600_shader_ctx *ctx, unsigned int offset, unsigned int dst_reg) +{ + struct r600_bc_vtx vtx; + unsigned int ar_reg; + int r; + + if (offset) { + struct r600_bc_alu alu; + + memset(&alu, 0, sizeof(alu)); + + alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT); + alu.src[0].sel = ctx->ar_reg; + + alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; + alu.src[1].value = offset; + + alu.dst.sel = dst_reg; + alu.dst.write = 1; + alu.last = 1; + + if ((r = r600_bc_add_alu(ctx->bc, &alu))) + return r; + + ar_reg = dst_reg; + } else { + ar_reg = ctx->ar_reg; + } + + memset(&vtx, 0, sizeof(vtx)); + vtx.fetch_type = 2; /* VTX_FETCH_NO_INDEX_OFFSET */ + vtx.src_gpr = ar_reg; + vtx.mega_fetch_count = 16; + vtx.dst_gpr = dst_reg; + vtx.dst_sel_x = 0; /* SEL_X */ + vtx.dst_sel_y = 1; /* SEL_Y */ + vtx.dst_sel_z = 2; /* SEL_Z */ + vtx.dst_sel_w = 3; /* SEL_W */ + vtx.data_format = FMT_32_32_32_32_FLOAT; + vtx.num_format_all = 2; /* NUM_FORMAT_SCALED */ + vtx.format_comp_all = 1; /* FORMAT_COMP_SIGNED */ + vtx.srf_mode_all = 1; /* SRF_MODE_NO_ZERO */ + + if ((r = r600_bc_add_vtx(ctx->bc, &vtx))) + return r; + + return 0; +} + +static int tgsi_split_constant(struct r600_shader_ctx *ctx) +{ + struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; + struct r600_bc_alu alu; + int i, j, k, nconst, r; + + for (i = 0, nconst = 0; i < inst->Instruction.NumSrcRegs; i++) { + if (inst->Src[i].Register.File == TGSI_FILE_CONSTANT) { + nconst++; + } + tgsi_src(ctx, &inst->Src[i], &ctx->src[i]); + } + for (i = 0, j = nconst - 1; i < inst->Instruction.NumSrcRegs; i++) { + if (inst->Src[i].Register.File != TGSI_FILE_CONSTANT) { + continue; + } + + if (ctx->src[i].rel) { + int treg = r600_get_temp(ctx); + if ((r = tgsi_fetch_rel_const(ctx, ctx->src[i].sel - 512, treg))) + return r; + + ctx->src[i].sel = treg; + ctx->src[i].rel = 0; + j--; + } else if (j > 0) { + int treg = r600_get_temp(ctx); + for (k = 0; k < 4; k++) { + memset(&alu, 0, sizeof(struct r600_bc_alu)); + alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); + alu.src[0].sel = ctx->src[i].sel; + alu.src[0].chan = k; + alu.src[0].rel = ctx->src[i].rel; + alu.dst.sel = treg; + alu.dst.chan = k; + alu.dst.write = 1; + if (k == 3) + alu.last = 1; + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + } + ctx->src[i].sel = treg; + ctx->src[i].rel =0; + j--; + } + } + return 0; +} + +/* need to move any immediate into a temp - for trig functions which use literal for PI stuff */ +static int tgsi_split_literal_constant(struct r600_shader_ctx *ctx) +{ + struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; + struct r600_bc_alu alu; + int i, j, k, nliteral, r; + + for (i = 0, nliteral = 0; i < inst->Instruction.NumSrcRegs; i++) { + if (ctx->src[i].sel == V_SQ_ALU_SRC_LITERAL) { + nliteral++; + } + } + for (i = 0, j = nliteral - 1; i < inst->Instruction.NumSrcRegs; i++) { + if (j > 0 && ctx->src[i].sel == V_SQ_ALU_SRC_LITERAL) { + int treg = r600_get_temp(ctx); + for (k = 0; k < 4; k++) { + memset(&alu, 0, sizeof(struct r600_bc_alu)); + alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); + alu.src[0].sel = ctx->src[i].sel; + alu.src[0].chan = k; + alu.src[0].value = ctx->src[i].value[k]; + alu.dst.sel = treg; + alu.dst.chan = k; + alu.dst.write = 1; + if (k == 3) + alu.last = 1; + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + } + ctx->src[i].sel = treg; + j--; + } + } + return 0; +} + +static int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader) { struct tgsi_full_immediate *immediate; + struct tgsi_full_property *property; struct r600_shader_ctx ctx; struct r600_bc_output output[32]; unsigned output_done, noutput; @@ -506,7 +587,9 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s /* Values [0,127] correspond to GPR[0..127]. * Values [128,159] correspond to constant buffer bank 0 * Values [160,191] correspond to constant buffer bank 1 - * Values [256,511] correspond to cfile constants c[0..255]. + * Values [256,511] correspond to cfile constants c[0..255]. (Gone on EG) + * Values [256,287] correspond to constant buffer bank 2 (EG) + * Values [288,319] correspond to constant buffer bank 3 (EG) * Other special values are shown in the list below. * 244 ALU_SRC_1_DBL_L: special constant 1.0 double-float, LSW. (RV670+) * 245 ALU_SRC_1_DBL_M: special constant 1.0 double-float, MSW. (RV670+) @@ -540,15 +623,18 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s ctx.file_offset[TGSI_FILE_TEMPORARY] = ctx.file_offset[TGSI_FILE_OUTPUT] + ctx.info.file_count[TGSI_FILE_OUTPUT]; - ctx.file_offset[TGSI_FILE_CONSTANT] = 128; + /* Outside the GPR range. This will be translated to one of the + * kcache banks later. */ + ctx.file_offset[TGSI_FILE_CONSTANT] = 512; - ctx.file_offset[TGSI_FILE_IMMEDIATE] = 253; - ctx.temp_reg = ctx.file_offset[TGSI_FILE_TEMPORARY] + + ctx.file_offset[TGSI_FILE_IMMEDIATE] = V_SQ_ALU_SRC_LITERAL; + ctx.ar_reg = ctx.file_offset[TGSI_FILE_TEMPORARY] + ctx.info.file_count[TGSI_FILE_TEMPORARY]; + ctx.temp_reg = ctx.ar_reg + 1; ctx.nliterals = 0; ctx.literals = NULL; - + shader->fs_write_all = FALSE; while (!tgsi_parse_end_of_tokens(&ctx.parse)) { tgsi_parse_token(&ctx.parse); switch (ctx.parse.FullToken.Token.Type) { @@ -577,7 +663,12 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s ctx.max_driver_temp_used = 0; /* reserve first tmp for everyone */ r600_get_temp(&ctx); + opcode = ctx.parse.FullToken.FullInstruction.Instruction.Opcode; + if ((r = tgsi_split_constant(&ctx))) + goto out_err; + if ((r = tgsi_split_literal_constant(&ctx))) + goto out_err; if (ctx.bc->chiprev == CHIPREV_EVERGREEN) ctx.inst_info = &eg_shader_tgsi_instruction[opcode]; else @@ -585,9 +676,13 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s r = ctx.inst_info->process(&ctx); if (r) goto out_err; - r = r600_bc_add_literal(ctx.bc, ctx.value); - if (r) - goto out_err; + break; + case TGSI_TOKEN_TYPE_PROPERTY: + property = &ctx.parse.FullToken.FullProperty; + if (property->Property.PropertyName == TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS) { + if (property->u[0].Data == 1) + shader->fs_write_all = TRUE; + } break; default: R600_ERR("unsupported token type %d\n", ctx.parse.FullToken.Token.Type); @@ -605,6 +700,7 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s output[i].swizzle_y = 1; output[i].swizzle_z = 2; output[i].swizzle_w = 3; + output[i].burst_count = 1; output[i].barrier = 1; output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM; output[i].array_base = i - pos0; @@ -668,6 +764,7 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s output[i].swizzle_y = 1; output[i].swizzle_z = 2; output[i].swizzle_w = 3; + output[i].burst_count = 1; output[i].barrier = 1; output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM; output[i].array_base = 0; @@ -684,6 +781,7 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s output[0].swizzle_y = 7; output[0].swizzle_z = 7; output[0].swizzle_w = 7; + output[0].burst_count = 1; output[0].barrier = 1; output[0].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; output[0].array_base = 0; @@ -726,33 +824,22 @@ static int tgsi_end(struct r600_shader_ctx *ctx) return 0; } -static int tgsi_src(struct r600_shader_ctx *ctx, - const struct tgsi_full_src_register *tgsi_src, - struct r600_bc_alu_src *r600_src) +static void r600_bc_src(struct r600_bc_alu_src *bc_src, + const struct r600_shader_src *shader_src, + unsigned chan) { - int index; - memset(r600_src, 0, sizeof(struct r600_bc_alu_src)); - r600_src->sel = tgsi_src->Register.Index; - if (tgsi_src->Register.File == TGSI_FILE_IMMEDIATE) { - r600_src->sel = 0; - index = tgsi_src->Register.Index; - ctx->value[0] = ctx->literals[index * 4 + 0]; - ctx->value[1] = ctx->literals[index * 4 + 1]; - ctx->value[2] = ctx->literals[index * 4 + 2]; - ctx->value[3] = ctx->literals[index * 4 + 3]; - } - if (tgsi_src->Register.Indirect) - r600_src->rel = V_SQ_REL_RELATIVE; - r600_src->neg = tgsi_src->Register.Negate; - r600_src->abs = tgsi_src->Register.Absolute; - r600_src->sel += ctx->file_offset[tgsi_src->Register.File]; - return 0; + bc_src->sel = shader_src->sel; + bc_src->chan = shader_src->swizzle[chan]; + bc_src->neg = shader_src->neg; + bc_src->abs = shader_src->abs; + bc_src->rel = shader_src->rel; + bc_src->value = shader_src->value[bc_src->chan]; } -static int tgsi_dst(struct r600_shader_ctx *ctx, - const struct tgsi_full_dst_register *tgsi_dst, - unsigned swizzle, - struct r600_bc_alu_dst *r600_dst) +static void tgsi_dst(struct r600_shader_ctx *ctx, + const struct tgsi_full_dst_register *tgsi_dst, + unsigned swizzle, + struct r600_bc_alu_dst *r600_dst) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; @@ -765,146 +852,42 @@ static int tgsi_dst(struct r600_shader_ctx *ctx, if (inst->Instruction.Saturate) { r600_dst->clamp = 1; } - return 0; } -static unsigned tgsi_chan(const struct tgsi_full_src_register *tgsi_src, unsigned swizzle) +static int tgsi_last_instruction(unsigned writemask) { - switch (swizzle) { - case 0: - return tgsi_src->Register.SwizzleX; - case 1: - return tgsi_src->Register.SwizzleY; - case 2: - return tgsi_src->Register.SwizzleZ; - case 3: - return tgsi_src->Register.SwizzleW; - default: - return 0; - } -} + int i, lasti = 0; -static int tgsi_split_constant(struct r600_shader_ctx *ctx, struct r600_bc_alu_src r600_src[3]) -{ - struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; - struct r600_bc_alu alu; - int i, j, k, nconst, r; - - for (i = 0, nconst = 0; i < inst->Instruction.NumSrcRegs; i++) { - if (inst->Src[i].Register.File == TGSI_FILE_CONSTANT) { - nconst++; - } - r = tgsi_src(ctx, &inst->Src[i], &r600_src[i]); - if (r) { - return r; - } - } - for (i = 0, j = nconst - 1; i < inst->Instruction.NumSrcRegs; i++) { - if (j > 0 && inst->Src[i].Register.File == TGSI_FILE_CONSTANT) { - int treg = r600_get_temp(ctx); - for (k = 0; k < 4; k++) { - memset(&alu, 0, sizeof(struct r600_bc_alu)); - alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); - alu.src[0].sel = r600_src[i].sel; - alu.src[0].chan = k; - alu.src[0].rel = r600_src[i].rel; - alu.dst.sel = treg; - alu.dst.chan = k; - alu.dst.write = 1; - if (k == 3) - alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); - if (r) - return r; - } - r600_src[i].sel = treg; - r600_src[i].rel =0; - j--; - } - } - return 0; -} - -/* need to move any immediate into a temp - for trig functions which use literal for PI stuff */ -static int tgsi_split_literal_constant(struct r600_shader_ctx *ctx, struct r600_bc_alu_src r600_src[3]) -{ - struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; - struct r600_bc_alu alu; - int i, j, k, nliteral, r; - - for (i = 0, nliteral = 0; i < inst->Instruction.NumSrcRegs; i++) { - if (inst->Src[i].Register.File == TGSI_FILE_IMMEDIATE) { - nliteral++; - } - } - for (i = 0, j = nliteral - 1; i < inst->Instruction.NumSrcRegs; i++) { - if (j > 0 && inst->Src[i].Register.File == TGSI_FILE_IMMEDIATE) { - int treg = r600_get_temp(ctx); - for (k = 0; k < 4; k++) { - memset(&alu, 0, sizeof(struct r600_bc_alu)); - alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); - alu.src[0].sel = r600_src[i].sel; - alu.src[0].chan = k; - alu.dst.sel = treg; - alu.dst.chan = k; - alu.dst.write = 1; - if (k == 3) - alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); - if (r) - return r; - } - r = r600_bc_add_literal(ctx->bc, &ctx->literals[inst->Src[i].Register.Index * 4]); - if (r) - return r; - r600_src[i].sel = treg; - j--; + for (i = 0; i < 4; i++) { + if (writemask & (1 << i)) { + lasti = i; } } - return 0; + return lasti; } static int tgsi_op2_s(struct r600_shader_ctx *ctx, int swap) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; - struct r600_bc_alu_src r600_src[3]; struct r600_bc_alu alu; int i, j, r; - int lasti = 0; + int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); - for (i = 0; i < 4; i++) { - if (inst->Dst[0].Register.WriteMask & (1 << i)) { - lasti = i; - } - } - - r = tgsi_split_constant(ctx, r600_src); - if (r) - return r; - r = tgsi_split_literal_constant(ctx, r600_src); - if (r) - return r; for (i = 0; i < lasti + 1; i++) { if (!(inst->Dst[0].Register.WriteMask & (1 << i))) continue; memset(&alu, 0, sizeof(struct r600_bc_alu)); - r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); - if (r) - return r; + tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); alu.inst = ctx->inst_info->r600_opcode; if (!swap) { for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { - alu.src[j] = r600_src[j]; - alu.src[j].chan = tgsi_chan(&inst->Src[j], i); + r600_bc_src(&alu.src[j], &ctx->src[j], i); } } else { - alu.src[0] = r600_src[1]; - alu.src[0].chan = tgsi_chan(&inst->Src[1], i); - - alu.src[1] = r600_src[0]; - alu.src[1].chan = tgsi_chan(&inst->Src[0], i); + r600_bc_src(&alu.src[0], &ctx->src[1], i); + r600_bc_src(&alu.src[1], &ctx->src[0], i); } /* handle some special cases */ switch (ctx->inst_info->tgsi_opcode) { @@ -942,25 +925,15 @@ static int tgsi_op2_swap(struct r600_shader_ctx *ctx) * r700 - normalize by dividing by 2PI * see fdo bug 27901 */ -static int tgsi_setup_trig(struct r600_shader_ctx *ctx, - struct r600_bc_alu_src r600_src[3]) +static int tgsi_setup_trig(struct r600_shader_ctx *ctx) { - struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; + static float half_inv_pi = 1.0 /(3.1415926535 * 2); + static float double_pi = 3.1415926535 * 2; + static float neg_pi = -3.1415926535; + int r; - uint32_t lit_vals[4]; struct r600_bc_alu alu; - memset(lit_vals, 0, 4*4); - r = tgsi_split_constant(ctx, r600_src); - if (r) - return r; - r = tgsi_split_literal_constant(ctx, r600_src); - if (r) - return r; - - lit_vals[0] = fui(1.0 /(3.1415926535 * 2)); - lit_vals[1] = fui(0.5f); - memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); alu.is_op3 = 1; @@ -969,20 +942,17 @@ static int tgsi_setup_trig(struct r600_shader_ctx *ctx, alu.dst.sel = ctx->temp_reg; alu.dst.write = 1; - alu.src[0] = r600_src[0]; - alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); + r600_bc_src(&alu.src[0], &ctx->src[0], 0); alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; alu.src[1].chan = 0; - alu.src[2].sel = V_SQ_ALU_SRC_LITERAL; - alu.src[2].chan = 1; + alu.src[1].value = *(uint32_t *)&half_inv_pi; + alu.src[2].sel = V_SQ_ALU_SRC_0_5; + alu.src[2].chan = 0; alu.last = 1; r = r600_bc_add_alu(ctx->bc, &alu); if (r) return r; - r = r600_bc_add_literal(ctx->bc, lit_vals); - if (r) - return r; memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT); @@ -998,14 +968,6 @@ static int tgsi_setup_trig(struct r600_shader_ctx *ctx, if (r) return r; - if (ctx->bc->chiprev == CHIPREV_R600) { - lit_vals[0] = fui(3.1415926535897f * 2.0f); - lit_vals[1] = fui(-3.1415926535897f); - } else { - lit_vals[0] = fui(1.0f); - lit_vals[1] = fui(-0.5f); - } - memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); alu.is_op3 = 1; @@ -1020,26 +982,32 @@ static int tgsi_setup_trig(struct r600_shader_ctx *ctx, alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; alu.src[1].chan = 0; alu.src[2].sel = V_SQ_ALU_SRC_LITERAL; - alu.src[2].chan = 1; + alu.src[2].chan = 0; + + if (ctx->bc->chiprev == CHIPREV_R600) { + alu.src[1].value = *(uint32_t *)&double_pi; + alu.src[2].value = *(uint32_t *)&neg_pi; + } else { + alu.src[1].sel = V_SQ_ALU_SRC_1; + alu.src[2].sel = V_SQ_ALU_SRC_0_5; + alu.src[2].neg = 1; + } + alu.last = 1; r = r600_bc_add_alu(ctx->bc, &alu); if (r) return r; - r = r600_bc_add_literal(ctx->bc, lit_vals); - if (r) - return r; return 0; } static int tgsi_trig(struct r600_shader_ctx *ctx) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; - struct r600_bc_alu_src r600_src[3]; struct r600_bc_alu alu; int i, r; - int lasti = 0; + int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); - r = tgsi_setup_trig(ctx, r600_src); + r = tgsi_setup_trig(ctx); if (r) return r; @@ -1057,10 +1025,6 @@ static int tgsi_trig(struct r600_shader_ctx *ctx) return r; /* replicate result */ - for (i = 0; i < 4; i++) { - if (inst->Dst[0].Register.WriteMask & (1 << i)) - lasti = i; - } for (i = 0; i < lasti + 1; i++) { if (!(inst->Dst[0].Register.WriteMask & (1 << i))) continue; @@ -1069,9 +1033,7 @@ static int tgsi_trig(struct r600_shader_ctx *ctx) alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); alu.src[0].sel = ctx->temp_reg; - r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); - if (r) - return r; + tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); if (i == lasti) alu.last = 1; r = r600_bc_add_alu(ctx->bc, &alu); @@ -1084,7 +1046,6 @@ static int tgsi_trig(struct r600_shader_ctx *ctx) static int tgsi_scs(struct r600_shader_ctx *ctx) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; - struct r600_bc_alu_src r600_src[3]; struct r600_bc_alu alu; int r; @@ -1092,7 +1053,7 @@ static int tgsi_scs(struct r600_shader_ctx *ctx) * X or Y components of the destination vector. */ if (likely(inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY)) { - r = tgsi_setup_trig(ctx, r600_src); + r = tgsi_setup_trig(ctx); if (r) return r; } @@ -1101,9 +1062,7 @@ static int tgsi_scs(struct r600_shader_ctx *ctx) if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS); - r = tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst); - if (r) - return r; + tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst); alu.src[0].sel = ctx->temp_reg; alu.src[0].chan = 0; @@ -1117,9 +1076,7 @@ static int tgsi_scs(struct r600_shader_ctx *ctx) if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN); - r = tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst); - if (r) - return r; + tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst); alu.src[0].sel = ctx->temp_reg; alu.src[0].chan = 0; @@ -1135,9 +1092,7 @@ static int tgsi_scs(struct r600_shader_ctx *ctx) alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); - r = tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst); - if (r) - return r; + tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst); alu.src[0].sel = V_SQ_ALU_SRC_0; alu.src[0].chan = 0; @@ -1147,10 +1102,6 @@ static int tgsi_scs(struct r600_shader_ctx *ctx) r = r600_bc_add_alu(ctx->bc, &alu); if (r) return r; - - r = r600_bc_add_literal(ctx->bc, ctx->value); - if (r) - return r; } /* dst.w = 1.0; */ @@ -1159,9 +1110,7 @@ static int tgsi_scs(struct r600_shader_ctx *ctx) alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); - r = tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst); - if (r) - return r; + tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst); alu.src[0].sel = V_SQ_ALU_SRC_1; alu.src[0].chan = 0; @@ -1171,10 +1120,6 @@ static int tgsi_scs(struct r600_shader_ctx *ctx) r = r600_bc_add_alu(ctx->bc, &alu); if (r) return r; - - r = r600_bc_add_literal(ctx->bc, ctx->value); - if (r) - return r; } return 0; @@ -1182,7 +1127,6 @@ static int tgsi_scs(struct r600_shader_ctx *ctx) static int tgsi_kill(struct r600_shader_ctx *ctx) { - struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; struct r600_bc_alu alu; int i, r; @@ -1198,10 +1142,7 @@ static int tgsi_kill(struct r600_shader_ctx *ctx) alu.src[1].sel = V_SQ_ALU_SRC_1; alu.src[1].neg = 1; } else { - r = tgsi_src(ctx, &inst->Src[0], &alu.src[1]); - if (r) - return r; - alu.src[1].chan = tgsi_chan(&inst->Src[0], i); + r600_bc_src(&alu.src[1], &ctx->src[0], i); } if (i == 3) { alu.last = 1; @@ -1210,9 +1151,6 @@ static int tgsi_kill(struct r600_shader_ctx *ctx) if (r) return r; } - r = r600_bc_add_literal(ctx->bc, ctx->value); - if (r) - return r; /* kill must be last in ALU */ ctx->bc->force_add_cf = 1; @@ -1224,24 +1162,14 @@ static int tgsi_lit(struct r600_shader_ctx *ctx) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; struct r600_bc_alu alu; - struct r600_bc_alu_src r600_src[3]; int r; - r = tgsi_split_constant(ctx, r600_src); - if (r) - return r; - r = tgsi_split_literal_constant(ctx, r600_src); - if (r) - return r; - /* dst.x, <- 1.0 */ memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); alu.src[0].sel = V_SQ_ALU_SRC_1; /*1.0*/ alu.src[0].chan = 0; - r = tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst); - if (r) - return r; + tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst); alu.dst.write = (inst->Dst[0].Register.WriteMask >> 0) & 1; r = r600_bc_add_alu(ctx->bc, &alu); if (r) @@ -1250,12 +1178,10 @@ static int tgsi_lit(struct r600_shader_ctx *ctx) /* dst.y = max(src.x, 0.0) */ memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX); - alu.src[0] = r600_src[0]; + r600_bc_src(&alu.src[0], &ctx->src[0], 0); alu.src[1].sel = V_SQ_ALU_SRC_0; /*0.0*/ alu.src[1].chan = 0; - r = tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst); - if (r) - return r; + tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst); alu.dst.write = (inst->Dst[0].Register.WriteMask >> 1) & 1; r = r600_bc_add_alu(ctx->bc, &alu); if (r) @@ -1266,19 +1192,13 @@ static int tgsi_lit(struct r600_shader_ctx *ctx) alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); alu.src[0].sel = V_SQ_ALU_SRC_1; alu.src[0].chan = 0; - r = tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst); - if (r) - return r; + tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst); alu.dst.write = (inst->Dst[0].Register.WriteMask >> 3) & 1; alu.last = 1; r = r600_bc_add_alu(ctx->bc, &alu); if (r) return r; - r = r600_bc_add_literal(ctx->bc, ctx->value); - if (r) - return r; - if (inst->Dst[0].Register.WriteMask & (1 << 2)) { int chan; @@ -1287,33 +1207,24 @@ static int tgsi_lit(struct r600_shader_ctx *ctx) /* dst.z = log(src.y) */ memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED); - alu.src[0] = r600_src[0]; - alu.src[0].chan = tgsi_chan(&inst->Src[0], 1); - r = tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst); - if (r) - return r; + r600_bc_src(&alu.src[0], &ctx->src[0], 1); + tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst); alu.last = 1; r = r600_bc_add_alu(ctx->bc, &alu); if (r) return r; - r = r600_bc_add_literal(ctx->bc, ctx->value); - if (r) - return r; - chan = alu.dst.chan; sel = alu.dst.sel; /* tmp.x = amd MUL_LIT(src.w, dst.z, src.x ) */ memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT); - alu.src[0] = r600_src[0]; - alu.src[0].chan = tgsi_chan(&inst->Src[0], 3); + r600_bc_src(&alu.src[0], &ctx->src[0], 3); alu.src[1].sel = sel; alu.src[1].chan = chan; - alu.src[2] = r600_src[0]; - alu.src[2].chan = tgsi_chan(&inst->Src[0], 0); + r600_bc_src(&alu.src[2], &ctx->src[0], 0); alu.dst.sel = ctx->temp_reg; alu.dst.chan = 0; alu.dst.write = 1; @@ -1323,17 +1234,12 @@ static int tgsi_lit(struct r600_shader_ctx *ctx) if (r) return r; - r = r600_bc_add_literal(ctx->bc, ctx->value); - if (r) - return r; /* dst.z = exp(tmp.x) */ memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); alu.src[0].sel = ctx->temp_reg; alu.src[0].chan = 0; - r = tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst); - if (r) - return r; + tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst); alu.last = 1; r = r600_bc_add_alu(ctx->bc, &alu); if (r) @@ -1357,10 +1263,7 @@ static int tgsi_rsq(struct r600_shader_ctx *ctx) alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_CLAMPED); for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { - r = tgsi_src(ctx, &inst->Src[i], &alu.src[i]); - if (r) - return r; - alu.src[i].chan = tgsi_chan(&inst->Src[i], 0); + r600_bc_src(&alu.src[i], &ctx->src[i], 0); alu.src[i].abs = 1; } alu.dst.sel = ctx->temp_reg; @@ -1369,9 +1272,6 @@ static int tgsi_rsq(struct r600_shader_ctx *ctx) r = r600_bc_add_alu(ctx->bc, &alu); if (r) return r; - r = r600_bc_add_literal(ctx->bc, ctx->value); - if (r) - return r; /* replicate result */ return tgsi_helper_tempx_replicate(ctx); } @@ -1387,9 +1287,7 @@ static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx) alu.src[0].sel = ctx->temp_reg; alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); alu.dst.chan = i; - r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); - if (r) - return r; + tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1; if (i == 3) alu.last = 1; @@ -1409,10 +1307,7 @@ static int tgsi_trans_srcx_replicate(struct r600_shader_ctx *ctx) memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = ctx->inst_info->r600_opcode; for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { - r = tgsi_src(ctx, &inst->Src[i], &alu.src[i]); - if (r) - return r; - alu.src[i].chan = tgsi_chan(&inst->Src[i], 0); + r600_bc_src(&alu.src[i], &ctx->src[i], 0); } alu.dst.sel = ctx->temp_reg; alu.dst.write = 1; @@ -1420,42 +1315,29 @@ static int tgsi_trans_srcx_replicate(struct r600_shader_ctx *ctx) r = r600_bc_add_alu(ctx->bc, &alu); if (r) return r; - r = r600_bc_add_literal(ctx->bc, ctx->value); - if (r) - return r; /* replicate result */ return tgsi_helper_tempx_replicate(ctx); } static int tgsi_pow(struct r600_shader_ctx *ctx) { - struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; struct r600_bc_alu alu; int r; /* LOG2(a) */ memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); - r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); - if (r) - return r; - alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); + r600_bc_src(&alu.src[0], &ctx->src[0], 0); alu.dst.sel = ctx->temp_reg; alu.dst.write = 1; alu.last = 1; r = r600_bc_add_alu(ctx->bc, &alu); if (r) return r; - r = r600_bc_add_literal(ctx->bc,ctx->value); - if (r) - return r; /* b * LOG2(a) */ memset(&alu, 0, sizeof(struct r600_bc_alu)); - alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL_IEEE); - r = tgsi_src(ctx, &inst->Src[1], &alu.src[0]); - if (r) - return r; - alu.src[0].chan = tgsi_chan(&inst->Src[1], 0); + alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); + r600_bc_src(&alu.src[0], &ctx->src[1], 0); alu.src[1].sel = ctx->temp_reg; alu.dst.sel = ctx->temp_reg; alu.dst.write = 1; @@ -1463,9 +1345,6 @@ static int tgsi_pow(struct r600_shader_ctx *ctx) r = r600_bc_add_alu(ctx->bc, &alu); if (r) return r; - r = r600_bc_add_literal(ctx->bc,ctx->value); - if (r) - return r; /* POW(a,b) = EXP2(b * LOG2(a))*/ memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); @@ -1476,9 +1355,6 @@ static int tgsi_pow(struct r600_shader_ctx *ctx) r = r600_bc_add_alu(ctx->bc, &alu); if (r) return r; - r = r600_bc_add_literal(ctx->bc,ctx->value); - if (r) - return r; return tgsi_helper_tempx_replicate(ctx); } @@ -1486,16 +1362,8 @@ static int tgsi_ssg(struct r600_shader_ctx *ctx) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; struct r600_bc_alu alu; - struct r600_bc_alu_src r600_src[3]; int i, r; - r = tgsi_split_constant(ctx, r600_src); - if (r) - return r; - r = tgsi_split_literal_constant(ctx, r600_src); - if (r) - return r; - /* tmp = (src > 0 ? 1 : src) */ for (i = 0; i < 4; i++) { memset(&alu, 0, sizeof(struct r600_bc_alu)); @@ -1505,31 +1373,23 @@ static int tgsi_ssg(struct r600_shader_ctx *ctx) alu.dst.sel = ctx->temp_reg; alu.dst.chan = i; - alu.src[0] = r600_src[0]; - alu.src[0].chan = tgsi_chan(&inst->Src[0], i); - + r600_bc_src(&alu.src[0], &ctx->src[0], i); alu.src[1].sel = V_SQ_ALU_SRC_1; + r600_bc_src(&alu.src[2], &ctx->src[0], i); - alu.src[2] = r600_src[0]; - alu.src[2].chan = tgsi_chan(&inst->Src[0], i); if (i == 3) alu.last = 1; r = r600_bc_add_alu(ctx->bc, &alu); if (r) return r; } - r = r600_bc_add_literal(ctx->bc, ctx->value); - if (r) - return r; /* dst = (-tmp > 0 ? -1 : tmp) */ for (i = 0; i < 4; i++) { memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT); alu.is_op3 = 1; - r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); - if (r) - return r; + tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); alu.src[0].sel = ctx->temp_reg; alu.src[0].chan = i; @@ -1555,9 +1415,6 @@ static int tgsi_helper_copy(struct r600_shader_ctx *ctx, struct tgsi_full_instru struct r600_bc_alu alu; int i, r; - r = r600_bc_add_literal(ctx->bc, ctx->value); - if (r) - return r; for (i = 0; i < 4; i++) { memset(&alu, 0, sizeof(struct r600_bc_alu)); if (!(inst->Dst[0].Register.WriteMask & (1 << i))) { @@ -1565,9 +1422,7 @@ static int tgsi_helper_copy(struct r600_shader_ctx *ctx, struct tgsi_full_instru alu.dst.chan = i; } else { alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); - r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); - if (r) - return r; + tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); alu.src[0].sel = ctx->temp_reg; alu.src[0].chan = i; } @@ -1584,61 +1439,50 @@ static int tgsi_helper_copy(struct r600_shader_ctx *ctx, struct tgsi_full_instru static int tgsi_op3(struct r600_shader_ctx *ctx) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; - struct r600_bc_alu_src r600_src[3]; struct r600_bc_alu alu; int i, j, r; + int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); + + for (i = 0; i < lasti + 1; i++) { + if (!(inst->Dst[0].Register.WriteMask & (1 << i))) + continue; - r = tgsi_split_constant(ctx, r600_src); - if (r) - return r; - r = tgsi_split_literal_constant(ctx, r600_src); - if (r) - return r; - /* do it in 2 step as op3 doesn't support writemask */ - for (i = 0; i < 4; i++) { memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = ctx->inst_info->r600_opcode; for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { - alu.src[j] = r600_src[j]; - alu.src[j].chan = tgsi_chan(&inst->Src[j], i); + r600_bc_src(&alu.src[j], &ctx->src[j], i); } - alu.dst.sel = ctx->temp_reg; + + tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); alu.dst.chan = i; alu.dst.write = 1; alu.is_op3 = 1; - if (i == 3) { + if (i == lasti) { alu.last = 1; } r = r600_bc_add_alu(ctx->bc, &alu); if (r) return r; } - return tgsi_helper_copy(ctx, inst); + return 0; } static int tgsi_dp(struct r600_shader_ctx *ctx) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; - struct r600_bc_alu_src r600_src[3]; struct r600_bc_alu alu; int i, j, r; - r = tgsi_split_constant(ctx, r600_src); - if (r) - return r; - r = tgsi_split_literal_constant(ctx, r600_src); - if (r) - return r; for (i = 0; i < 4; i++) { memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = ctx->inst_info->r600_opcode; for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { - alu.src[j] = r600_src[j]; - alu.src[j].chan = tgsi_chan(&inst->Src[j], i); + r600_bc_src(&alu.src[j], &ctx->src[j], i); } - alu.dst.sel = ctx->temp_reg; + + tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); alu.dst.chan = i; - alu.dst.write = 1; + alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1; /* handle some special cases */ switch (ctx->inst_info->tgsi_opcode) { case TGSI_OPCODE_DP2: @@ -1670,19 +1514,21 @@ static int tgsi_dp(struct r600_shader_ctx *ctx) if (r) return r; } - return tgsi_helper_copy(ctx, inst); + return 0; } static int tgsi_tex(struct r600_shader_ctx *ctx) { + static float one_point_five = 1.5f; struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; struct r600_bc_tex tex; struct r600_bc_alu alu; unsigned src_gpr; int r, i; int opcode; - boolean src_not_temp = inst->Src[0].Register.File != TGSI_FILE_TEMPORARY; - uint32_t lit_vals[4]; + boolean src_not_temp = + inst->Src[0].Register.File != TGSI_FILE_TEMPORARY && + inst->Src[0].Register.File != TGSI_FILE_INPUT; src_gpr = ctx->file_offset[inst->Src[0].Register.File] + inst->Src[0].Register.Index; @@ -1690,11 +1536,8 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) /* Add perspective divide */ memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE); - r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); - if (r) - return r; + r600_bc_src(&alu.src[0], &ctx->src[0], 3); - alu.src[0].chan = tgsi_chan(&inst->Src[0], 3); alu.dst.sel = ctx->temp_reg; alu.dst.chan = 3; alu.last = 1; @@ -1708,10 +1551,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); alu.src[0].sel = ctx->temp_reg; alu.src[0].chan = 3; - r = tgsi_src(ctx, &inst->Src[0], &alu.src[1]); - if (r) - return r; - alu.src[1].chan = tgsi_chan(&inst->Src[0], i); + r600_bc_src(&alu.src[1], &ctx->src[0], i); alu.dst.sel = ctx->temp_reg; alu.dst.chan = i; alu.dst.write = 1; @@ -1735,43 +1575,15 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) } if (inst->Texture.Texture == TGSI_TEXTURE_CUBE) { - int src_chan, src2_chan; + static const unsigned src0_swizzle[] = {2, 2, 0, 1}; + static const unsigned src1_swizzle[] = {1, 0, 2, 2}; /* tmp1.xyzw = CUBE(R0.zzxy, R0.yxzz) */ for (i = 0; i < 4; i++) { memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE); - switch (i) { - case 0: - src_chan = 2; - src2_chan = 1; - break; - case 1: - src_chan = 2; - src2_chan = 0; - break; - case 2: - src_chan = 0; - src2_chan = 2; - break; - case 3: - src_chan = 1; - src2_chan = 2; - break; - default: - assert(0); - src_chan = 0; - src2_chan = 0; - break; - } - r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); - if (r) - return r; - alu.src[0].chan = tgsi_chan(&inst->Src[0], src_chan); - r = tgsi_src(ctx, &inst->Src[0], &alu.src[1]); - if (r) - return r; - alu.src[1].chan = tgsi_chan(&inst->Src[0], src2_chan); + r600_bc_src(&alu.src[0], &ctx->src[0], src0_swizzle[i]); + r600_bc_src(&alu.src[1], &ctx->src[0], src1_swizzle[i]); alu.dst.sel = ctx->temp_reg; alu.dst.chan = i; if (i == 3) @@ -1811,6 +1623,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) alu.src[2].sel = V_SQ_ALU_SRC_LITERAL; alu.src[2].chan = 0; + alu.src[2].value = *(uint32_t *)&one_point_five; alu.dst.sel = ctx->temp_reg; alu.dst.chan = 0; @@ -1831,6 +1644,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) alu.src[2].sel = V_SQ_ALU_SRC_LITERAL; alu.src[2].chan = 0; + alu.src[2].value = *(uint32_t *)&one_point_five; alu.dst.sel = ctx->temp_reg; alu.dst.chan = 1; @@ -1841,11 +1655,6 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) if (r) return r; - lit_vals[0] = fui(1.5f); - - r = r600_bc_add_literal(ctx->bc, lit_vals); - if (r) - return r; src_not_temp = FALSE; src_gpr = ctx->temp_reg; } @@ -1854,8 +1663,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) for (i = 0; i < 4; i++) { memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); - alu.src[0].sel = src_gpr; - alu.src[0].chan = tgsi_chan(&inst->Src[0], i); + r600_bc_src(&alu.src[0], &ctx->src[0], i); alu.dst.sel = ctx->temp_reg; alu.dst.chan = i; if (i == 3) @@ -1876,7 +1684,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) memset(&tex, 0, sizeof(struct r600_bc_tex)); tex.inst = opcode; tex.sampler_id = ctx->file_offset[inst->Src[1].Register.File] + inst->Src[1].Register.Index; - tex.resource_id = tex.sampler_id; + tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS; tex.src_gpr = src_gpr; tex.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index; tex.dst_sel_x = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7; @@ -1902,6 +1710,12 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) tex.coord_type_w = 1; } + if (inst->Texture.Texture == TGSI_TEXTURE_1D_ARRAY) { + tex.coord_type_z = 0; + tex.src_sel_z = 1; + } else if (inst->Texture.Texture == TGSI_TEXTURE_2D_ARRAY) + tex.coord_type_z = 0; + if (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D || inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D) tex.src_sel_w = 2; @@ -1916,29 +1730,48 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) static int tgsi_lrp(struct r600_shader_ctx *ctx) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; - struct r600_bc_alu_src r600_src[3]; struct r600_bc_alu alu; + int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); unsigned i; int r; - r = tgsi_split_constant(ctx, r600_src); - if (r) - return r; - r = tgsi_split_literal_constant(ctx, r600_src); - if (r) - return r; + /* optimize if it's just an equal balance */ + if (ctx->src[0].sel == V_SQ_ALU_SRC_0_5) { + for (i = 0; i < lasti + 1; i++) { + if (!(inst->Dst[0].Register.WriteMask & (1 << i))) + continue; + + memset(&alu, 0, sizeof(struct r600_bc_alu)); + alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD); + r600_bc_src(&alu.src[0], &ctx->src[1], i); + r600_bc_src(&alu.src[1], &ctx->src[2], i); + alu.omod = 3; + tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); + alu.dst.chan = i; + if (i == lasti) { + alu.last = 1; + } + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + } + return 0; + } + /* 1 - src0 */ - for (i = 0; i < 4; i++) { + for (i = 0; i < lasti + 1; i++) { + if (!(inst->Dst[0].Register.WriteMask & (1 << i))) + continue; + memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD); alu.src[0].sel = V_SQ_ALU_SRC_1; alu.src[0].chan = 0; - alu.src[1] = r600_src[0]; - alu.src[1].chan = tgsi_chan(&inst->Src[0], i); + r600_bc_src(&alu.src[1], &ctx->src[0], i); alu.src[1].neg = 1; alu.dst.sel = ctx->temp_reg; alu.dst.chan = i; - if (i == 3) { + if (i == lasti) { alu.last = 1; } alu.dst.write = 1; @@ -1946,21 +1779,20 @@ static int tgsi_lrp(struct r600_shader_ctx *ctx) if (r) return r; } - r = r600_bc_add_literal(ctx->bc, ctx->value); - if (r) - return r; /* (1 - src0) * src2 */ - for (i = 0; i < 4; i++) { + for (i = 0; i < lasti + 1; i++) { + if (!(inst->Dst[0].Register.WriteMask & (1 << i))) + continue; + memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); alu.src[0].sel = ctx->temp_reg; alu.src[0].chan = i; - alu.src[1] = r600_src[2]; - alu.src[1].chan = tgsi_chan(&inst->Src[2], i); + r600_bc_src(&alu.src[1], &ctx->src[2], i); alu.dst.sel = ctx->temp_reg; alu.dst.chan = i; - if (i == 3) { + if (i == lasti) { alu.last = 1; } alu.dst.write = 1; @@ -1968,88 +1800,66 @@ static int tgsi_lrp(struct r600_shader_ctx *ctx) if (r) return r; } - r = r600_bc_add_literal(ctx->bc, ctx->value); - if (r) - return r; /* src0 * src1 + (1 - src0) * src2 */ - for (i = 0; i < 4; i++) { + for (i = 0; i < lasti + 1; i++) { + if (!(inst->Dst[0].Register.WriteMask & (1 << i))) + continue; + memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); alu.is_op3 = 1; - alu.src[0] = r600_src[0]; - alu.src[0].chan = tgsi_chan(&inst->Src[0], i); - alu.src[1] = r600_src[1]; - alu.src[1].chan = tgsi_chan(&inst->Src[1], i); + r600_bc_src(&alu.src[0], &ctx->src[0], i); + r600_bc_src(&alu.src[1], &ctx->src[1], i); alu.src[2].sel = ctx->temp_reg; alu.src[2].chan = i; - alu.dst.sel = ctx->temp_reg; + + tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); alu.dst.chan = i; - if (i == 3) { + if (i == lasti) { alu.last = 1; } r = r600_bc_add_alu(ctx->bc, &alu); if (r) return r; } - return tgsi_helper_copy(ctx, inst); + return 0; } static int tgsi_cmp(struct r600_shader_ctx *ctx) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; - struct r600_bc_alu_src r600_src[3]; struct r600_bc_alu alu; - int use_temp = 0; int i, r; + int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); - r = tgsi_split_constant(ctx, r600_src); - if (r) - return r; - r = tgsi_split_literal_constant(ctx, r600_src); - if (r) - return r; - - if (inst->Dst[0].Register.WriteMask != 0xf) - use_temp = 1; + for (i = 0; i < lasti + 1; i++) { + if (!(inst->Dst[0].Register.WriteMask & (1 << i))) + continue; - for (i = 0; i < 4; i++) { memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE); - alu.src[0] = r600_src[0]; - alu.src[0].chan = tgsi_chan(&inst->Src[0], i); - - alu.src[1] = r600_src[2]; - alu.src[1].chan = tgsi_chan(&inst->Src[2], i); - - alu.src[2] = r600_src[1]; - alu.src[2].chan = tgsi_chan(&inst->Src[1], i); - - if (use_temp) - alu.dst.sel = ctx->temp_reg; - else { - r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); - if (r) - return r; - } + r600_bc_src(&alu.src[0], &ctx->src[0], i); + r600_bc_src(&alu.src[1], &ctx->src[2], i); + r600_bc_src(&alu.src[2], &ctx->src[1], i); + tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); alu.dst.chan = i; alu.dst.write = 1; alu.is_op3 = 1; - if (i == 3) + if (i == lasti) alu.last = 1; r = r600_bc_add_alu(ctx->bc, &alu); if (r) return r; } - if (use_temp) - return tgsi_helper_copy(ctx, inst); return 0; } static int tgsi_xpd(struct r600_shader_ctx *ctx) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; - struct r600_bc_alu_src r600_src[3]; + static const unsigned int src0_swizzle[] = {2, 0, 1}; + static const unsigned int src1_swizzle[] = {1, 2, 0}; struct r600_bc_alu alu; uint32_t use_temp = 0; int i, r; @@ -2057,45 +1867,15 @@ static int tgsi_xpd(struct r600_shader_ctx *ctx) if (inst->Dst[0].Register.WriteMask != 0xf) use_temp = 1; - r = tgsi_split_constant(ctx, r600_src); - if (r) - return r; - r = tgsi_split_literal_constant(ctx, r600_src); - if (r) - return r; - for (i = 0; i < 4; i++) { memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); - - alu.src[0] = r600_src[0]; - switch (i) { - case 0: - alu.src[0].chan = tgsi_chan(&inst->Src[0], 2); - break; - case 1: - alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); - break; - case 2: - alu.src[0].chan = tgsi_chan(&inst->Src[0], 1); - break; - case 3: + if (i < 3) { + r600_bc_src(&alu.src[0], &ctx->src[0], src0_swizzle[i]); + r600_bc_src(&alu.src[1], &ctx->src[1], src1_swizzle[i]); + } else { alu.src[0].sel = V_SQ_ALU_SRC_0; alu.src[0].chan = i; - } - - alu.src[1] = r600_src[1]; - switch (i) { - case 0: - alu.src[1].chan = tgsi_chan(&inst->Src[1], 1); - break; - case 1: - alu.src[1].chan = tgsi_chan(&inst->Src[1], 2); - break; - case 2: - alu.src[1].chan = tgsi_chan(&inst->Src[1], 0); - break; - case 3: alu.src[1].sel = V_SQ_ALU_SRC_0; alu.src[1].chan = i; } @@ -2109,44 +1889,18 @@ static int tgsi_xpd(struct r600_shader_ctx *ctx) r = r600_bc_add_alu(ctx->bc, &alu); if (r) return r; - - r = r600_bc_add_literal(ctx->bc, ctx->value); - if (r) - return r; } for (i = 0; i < 4; i++) { memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); - alu.src[0] = r600_src[0]; - switch (i) { - case 0: - alu.src[0].chan = tgsi_chan(&inst->Src[0], 1); - break; - case 1: - alu.src[0].chan = tgsi_chan(&inst->Src[0], 2); - break; - case 2: - alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); - break; - case 3: + if (i < 3) { + r600_bc_src(&alu.src[0], &ctx->src[0], src1_swizzle[i]); + r600_bc_src(&alu.src[1], &ctx->src[1], src0_swizzle[i]); + } else { alu.src[0].sel = V_SQ_ALU_SRC_0; alu.src[0].chan = i; - } - - alu.src[1] = r600_src[1]; - switch (i) { - case 0: - alu.src[1].chan = tgsi_chan(&inst->Src[1], 2); - break; - case 1: - alu.src[1].chan = tgsi_chan(&inst->Src[1], 0); - break; - case 2: - alu.src[1].chan = tgsi_chan(&inst->Src[1], 1); - break; - case 3: alu.src[1].sel = V_SQ_ALU_SRC_0; alu.src[1].chan = i; } @@ -2157,11 +1911,8 @@ static int tgsi_xpd(struct r600_shader_ctx *ctx) if (use_temp) alu.dst.sel = ctx->temp_reg; - else { - r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); - if (r) - return r; - } + else + tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); alu.dst.chan = i; alu.dst.write = 1; alu.is_op3 = 1; @@ -2170,10 +1921,6 @@ static int tgsi_xpd(struct r600_shader_ctx *ctx) r = r600_bc_add_alu(ctx->bc, &alu); if (r) return r; - - r = r600_bc_add_literal(ctx->bc, ctx->value); - if (r) - return r; } if (use_temp) return tgsi_helper_copy(ctx, inst); @@ -2183,7 +1930,6 @@ static int tgsi_xpd(struct r600_shader_ctx *ctx) static int tgsi_exp(struct r600_shader_ctx *ctx) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; - struct r600_bc_alu_src r600_src[3] = { { 0 } }; struct r600_bc_alu alu; int r; @@ -2192,11 +1938,7 @@ static int tgsi_exp(struct r600_shader_ctx *ctx) memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR); - r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); - if (r) - return r; - - alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); + r600_bc_src(&alu.src[0], &ctx->src[0], 0); alu.dst.sel = ctx->temp_reg; alu.dst.chan = 0; @@ -2206,10 +1948,6 @@ static int tgsi_exp(struct r600_shader_ctx *ctx) if (r) return r; - r = r600_bc_add_literal(ctx->bc, ctx->value); - if (r) - return r; - alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); alu.src[0].sel = ctx->temp_reg; alu.src[0].chan = 0; @@ -2221,10 +1959,6 @@ static int tgsi_exp(struct r600_shader_ctx *ctx) r = r600_bc_add_alu(ctx->bc, &alu); if (r) return r; - - r = r600_bc_add_literal(ctx->bc, ctx->value); - if (r) - return r; } /* result.y = tmp - floor(tmp); */ @@ -2232,11 +1966,7 @@ static int tgsi_exp(struct r600_shader_ctx *ctx) memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT); - alu.src[0] = r600_src[0]; - r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); - if (r) - return r; - alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); + r600_bc_src(&alu.src[0], &ctx->src[0], 0); alu.dst.sel = ctx->temp_reg; // r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); @@ -2250,19 +1980,13 @@ static int tgsi_exp(struct r600_shader_ctx *ctx) r = r600_bc_add_alu(ctx->bc, &alu); if (r) return r; - r = r600_bc_add_literal(ctx->bc, ctx->value); - if (r) - return r; } /* result.z = RoughApprox2ToX(tmp);*/ if ((inst->Dst[0].Register.WriteMask >> 2) & 0x1) { memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); - r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); - if (r) - return r; - alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); + r600_bc_src(&alu.src[0], &ctx->src[0], 0); alu.dst.sel = ctx->temp_reg; alu.dst.write = 1; @@ -2273,9 +1997,6 @@ static int tgsi_exp(struct r600_shader_ctx *ctx) r = r600_bc_add_alu(ctx->bc, &alu); if (r) return r; - r = r600_bc_add_literal(ctx->bc, ctx->value); - if (r) - return r; } /* result.w = 1.0;*/ @@ -2293,9 +2014,6 @@ static int tgsi_exp(struct r600_shader_ctx *ctx) r = r600_bc_add_alu(ctx->bc, &alu); if (r) return r; - r = r600_bc_add_literal(ctx->bc, ctx->value); - if (r) - return r; } return tgsi_helper_copy(ctx, inst); } @@ -2311,11 +2029,7 @@ static int tgsi_log(struct r600_shader_ctx *ctx) memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); - r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); - if (r) - return r; - - alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); + r600_bc_src(&alu.src[0], &ctx->src[0], 0); alu.dst.sel = ctx->temp_reg; alu.dst.chan = 0; @@ -2325,10 +2039,6 @@ static int tgsi_log(struct r600_shader_ctx *ctx) if (r) return r; - r = r600_bc_add_literal(ctx->bc, ctx->value); - if (r) - return r; - alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR); alu.src[0].sel = ctx->temp_reg; alu.src[0].chan = 0; @@ -2341,10 +2051,6 @@ static int tgsi_log(struct r600_shader_ctx *ctx) r = r600_bc_add_alu(ctx->bc, &alu); if (r) return r; - - r = r600_bc_add_literal(ctx->bc, ctx->value); - if (r) - return r; } /* result.y = src.x / (2 ^ floor(log2(src.x))); */ @@ -2352,11 +2058,7 @@ static int tgsi_log(struct r600_shader_ctx *ctx) memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); - r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); - if (r) - return r; - - alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); + r600_bc_src(&alu.src[0], &ctx->src[0], 0); alu.dst.sel = ctx->temp_reg; alu.dst.chan = 1; @@ -2367,10 +2069,6 @@ static int tgsi_log(struct r600_shader_ctx *ctx) if (r) return r; - r = r600_bc_add_literal(ctx->bc, ctx->value); - if (r) - return r; - memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR); @@ -2386,10 +2084,6 @@ static int tgsi_log(struct r600_shader_ctx *ctx) if (r) return r; - r = r600_bc_add_literal(ctx->bc, ctx->value); - if (r) - return r; - memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); @@ -2405,10 +2099,6 @@ static int tgsi_log(struct r600_shader_ctx *ctx) if (r) return r; - r = r600_bc_add_literal(ctx->bc, ctx->value); - if (r) - return r; - memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE); @@ -2424,19 +2114,11 @@ static int tgsi_log(struct r600_shader_ctx *ctx) if (r) return r; - r = r600_bc_add_literal(ctx->bc, ctx->value); - if (r) - return r; - memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); - r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); - if (r) - return r; - - alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); + r600_bc_src(&alu.src[0], &ctx->src[0], 0); alu.src[1].sel = ctx->temp_reg; alu.src[1].chan = 1; @@ -2449,10 +2131,6 @@ static int tgsi_log(struct r600_shader_ctx *ctx) r = r600_bc_add_alu(ctx->bc, &alu); if (r) return r; - - r = r600_bc_add_literal(ctx->bc, ctx->value); - if (r) - return r; } /* result.z = log2(src);*/ @@ -2460,11 +2138,7 @@ static int tgsi_log(struct r600_shader_ctx *ctx) memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); - r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); - if (r) - return r; - - alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); + r600_bc_src(&alu.src[0], &ctx->src[0], 0); alu.dst.sel = ctx->temp_reg; alu.dst.write = 1; @@ -2474,10 +2148,6 @@ static int tgsi_log(struct r600_shader_ctx *ctx) r = r600_bc_add_alu(ctx->bc, &alu); if (r) return r; - - r = r600_bc_add_literal(ctx->bc, ctx->value); - if (r) - return r; } /* result.w = 1.0; */ @@ -2496,10 +2166,6 @@ static int tgsi_log(struct r600_shader_ctx *ctx) r = r600_bc_add_alu(ctx->bc, &alu); if (r) return r; - - r = r600_bc_add_literal(ctx->bc, ctx->value); - if (r) - return r; } return tgsi_helper_copy(ctx, inst); @@ -2510,6 +2176,7 @@ static int tgsi_eg_arl(struct r600_shader_ctx *ctx) struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; struct r600_bc_alu alu; int r; + memset(&alu, 0, sizeof(struct r600_bc_alu)); switch (inst->Instruction.Opcode) { @@ -2524,26 +2191,26 @@ static int tgsi_eg_arl(struct r600_shader_ctx *ctx) return -1; } - r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); - if (r) - return r; - alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); + r600_bc_src(&alu.src[0], &ctx->src[0], 0); alu.last = 1; - alu.dst.chan = 0; - alu.dst.sel = ctx->temp_reg; + alu.dst.sel = ctx->ar_reg; alu.dst.write = 1; - r = r600_bc_add_alu_type(ctx->bc, &alu, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU)); + r = r600_bc_add_alu(ctx->bc, &alu); if (r) return r; + + /* TODO: Note that the MOVA can be avoided if we never use AR for + * indexing non-CB registers in the current ALU clause. Similarly, we + * need to load AR from ar_reg again if we started a new clause + * between ARL and AR usage. The easy way to do that is to remove + * the MOVA here, and load it for the first AR access after ar_reg + * has been modified in each clause. */ memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT; - r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); - if (r) - return r; - alu.src[0].sel = ctx->temp_reg; + alu.src[0].sel = ctx->ar_reg; alu.src[0].chan = 0; alu.last = 1; - r = r600_bc_add_alu_type(ctx->bc, &alu, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU)); + r = r600_bc_add_alu(ctx->bc, &alu); if (r) return r; return 0; @@ -2554,29 +2221,51 @@ static int tgsi_r600_arl(struct r600_shader_ctx *ctx) struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; struct r600_bc_alu alu; int r; - memset(&alu, 0, sizeof(struct r600_bc_alu)); switch (inst->Instruction.Opcode) { case TGSI_OPCODE_ARL: - alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_FLOOR; + memset(&alu, 0, sizeof(alu)); + alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR; + r600_bc_src(&alu.src[0], &ctx->src[0], 0); + alu.dst.sel = ctx->ar_reg; + alu.dst.write = 1; + alu.last = 1; + + if ((r = r600_bc_add_alu(ctx->bc, &alu))) + return r; + + memset(&alu, 0, sizeof(alu)); + alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT; + alu.src[0].sel = ctx->ar_reg; + alu.dst.sel = ctx->ar_reg; + alu.dst.write = 1; + alu.last = 1; + + if ((r = r600_bc_add_alu(ctx->bc, &alu))) + return r; break; case TGSI_OPCODE_ARR: - alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA; + memset(&alu, 0, sizeof(alu)); + alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT; + r600_bc_src(&alu.src[0], &ctx->src[0], 0); + alu.dst.sel = ctx->ar_reg; + alu.dst.write = 1; + alu.last = 1; + + if ((r = r600_bc_add_alu(ctx->bc, &alu))) + return r; break; default: assert(0); return -1; } - - r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); - if (r) - return r; - alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); - + memset(&alu, 0, sizeof(alu)); + alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT; + alu.src[0].sel = ctx->ar_reg; alu.last = 1; - r = r600_bc_add_alu_type(ctx->bc, &alu, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU)); + r = r600_bc_add_alu(ctx->bc, &alu); if (r) return r; ctx->bc->cf_last->r6xx_uses_waterfall = 1; @@ -2593,26 +2282,18 @@ static int tgsi_opdst(struct r600_shader_ctx *ctx) memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); - r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); - if (r) - return r; + tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); if (i == 0 || i == 3) { alu.src[0].sel = V_SQ_ALU_SRC_1; } else { - r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); - if (r) - return r; - alu.src[0].chan = tgsi_chan(&inst->Src[0], i); + r600_bc_src(&alu.src[0], &ctx->src[0], i); } - if (i == 0 || i == 2) { + if (i == 0 || i == 2) { alu.src[1].sel = V_SQ_ALU_SRC_1; } else { - r = tgsi_src(ctx, &inst->Src[1], &alu.src[1]); - if (r) - return r; - alu.src[1].chan = tgsi_chan(&inst->Src[1], i); + r600_bc_src(&alu.src[1], &ctx->src[1], i); } if (i == 3) alu.last = 1; @@ -2625,7 +2306,6 @@ static int tgsi_opdst(struct r600_shader_ctx *ctx) static int emit_logic_pred(struct r600_shader_ctx *ctx, int opcode) { - struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; struct r600_bc_alu alu; int r; @@ -2637,10 +2317,7 @@ static int emit_logic_pred(struct r600_shader_ctx *ctx, int opcode) alu.dst.write = 1; alu.dst.chan = 0; - r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); - if (r) - return r; - alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); + r600_bc_src(&alu.src[0], &ctx->src[0], 0); alu.src[1].sel = V_SQ_ALU_SRC_0; alu.src[1].chan = 0; @@ -2654,9 +2331,25 @@ static int emit_logic_pred(struct r600_shader_ctx *ctx, int opcode) static int pops(struct r600_shader_ctx *ctx, int pops) { - r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_POP)); - ctx->bc->cf_last->pop_count = pops; - ctx->bc->cf_last->cf_addr = ctx->bc->cf_last->id + 2; + int alu_pop = 3; + if (ctx->bc->cf_last) { + if (ctx->bc->cf_last->inst == CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU) << 3) + alu_pop = 0; + else if (ctx->bc->cf_last->inst == CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER) << 3) + alu_pop = 1; + } + alu_pop += pops; + if (alu_pop == 1) { + ctx->bc->cf_last->inst = CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER) << 3; + ctx->bc->force_add_cf = 1; + } else if (alu_pop == 2) { + ctx->bc->cf_last->inst = CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER) << 3; + ctx->bc->force_add_cf = 1; + } else { + r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_POP)); + ctx->bc->cf_last->pop_count = pops; + ctx->bc->cf_last->cf_addr = ctx->bc->cf_last->id + 2; + } return 0; } @@ -3002,7 +2695,7 @@ static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = { {TGSI_OPCODE_CEIL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_I2F, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_NOT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_TRUNC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_trans_srcx_replicate}, + {TGSI_OPCODE_TRUNC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_op2}, {TGSI_OPCODE_SHL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, /* gap */ {88, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, @@ -3075,7 +2768,7 @@ static struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] = { {TGSI_OPCODE_RCP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE, tgsi_trans_srcx_replicate}, {TGSI_OPCODE_RSQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE, tgsi_trans_srcx_replicate}, {TGSI_OPCODE_EXP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp}, - {TGSI_OPCODE_LOG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_LOG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_log}, {TGSI_OPCODE_MUL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2}, {TGSI_OPCODE_ADD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2}, {TGSI_OPCODE_DP3, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, @@ -3160,7 +2853,7 @@ static struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] = { {TGSI_OPCODE_CEIL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_I2F, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_NOT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_TRUNC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_trans_srcx_replicate}, + {TGSI_OPCODE_TRUNC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_op2}, {TGSI_OPCODE_SHL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, /* gap */ {88, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, diff --git a/src/gallium/drivers/r600/r600_shader.h b/src/gallium/drivers/r600/r600_shader.h index 35b0331525..8f96ce5085 100644 --- a/src/gallium/drivers/r600/r600_shader.h +++ b/src/gallium/drivers/r600/r600_shader.h @@ -45,8 +45,7 @@ struct r600_shader { struct r600_shader_io output[32]; enum radeon_family family; boolean uses_kill; + boolean fs_write_all; }; -int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader); - #endif diff --git a/src/gallium/drivers/r600/r600_sq.h b/src/gallium/drivers/r600/r600_sq.h index 0573e63dc8..56ed35e8b3 100644 --- a/src/gallium/drivers/r600/r600_sq.h +++ b/src/gallium/drivers/r600/r600_sq.h @@ -74,6 +74,10 @@ #define S_SQ_CF_ALU_WORD0_KCACHE_MODE0(x) (((x) & 0x3) << 30) #define G_SQ_CF_ALU_WORD0_KCACHE_MODE0(x) (((x) >> 30) & 0x3) #define C_SQ_CF_ALU_WORD0_KCACHE_MODE0 0x3FFFFFFF +#define V_SQ_CF_KCACHE_NOP 0x00000000 +#define V_SQ_CF_KCACHE_LOCK_1 0x00000001 +#define V_SQ_CF_KCACHE_LOCK_2 0x00000002 +#define V_SQ_CF_KCACHE_LOCK_LOOP_INDEX 0x00000003 #define P_SQ_CF_ALU_WORD1 #define S_SQ_CF_ALU_WORD1_KCACHE_MODE1(x) (((x) & 0x3) << 0) #define G_SQ_CF_ALU_WORD1_KCACHE_MODE1(x) (((x) >> 0) & 0x3) @@ -187,6 +191,8 @@ #define V_SQ_ALU_SRC_M_1_INT 0x000000FB #define V_SQ_ALU_SRC_0_5 0x000000FC #define V_SQ_ALU_SRC_LITERAL 0x000000FD +#define V_SQ_ALU_SRC_PV 0x000000FE +#define V_SQ_ALU_SRC_PS 0x000000FF #define V_SQ_ALU_SRC_PARAM_BASE 0x000001C0 #define S_SQ_ALU_WORD0_SRC0_REL(x) (((x) & 0x1) << 9) #define G_SQ_ALU_WORD0_SRC0_REL(x) (((x) >> 9) & 0x1) diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index 553d786d65..d3adf0393c 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -36,8 +36,8 @@ #include <util/u_pack_color.h> #include <util/u_memory.h> #include <util/u_inlines.h> -#include <util/u_upload_mgr.h> #include <util/u_framebuffer.h> +#include "util/u_transfer.h" #include <pipebuffer/pb_buffer.h> #include "r600.h" #include "r600d.h" @@ -95,230 +95,6 @@ void r600_polygon_offset_update(struct r600_pipe_context *rctx) } } -/* FIXME optimize away spi update when it's not needed */ -static void r600_spi_update(struct r600_pipe_context *rctx) -{ - struct r600_pipe_shader *shader = rctx->ps_shader; - struct r600_pipe_state rstate; - struct r600_shader *rshader = &shader->shader; - unsigned i, tmp; - - rstate.nregs = 0; - for (i = 0; i < rshader->ninput; i++) { - tmp = S_028644_SEMANTIC(r600_find_vs_semantic_index(&rctx->vs_shader->shader, rshader, i)); - if (rshader->input[i].centroid) - tmp |= S_028644_SEL_CENTROID(1); - if (rshader->input[i].interpolate == TGSI_INTERPOLATE_LINEAR) - tmp |= S_028644_SEL_LINEAR(1); - - if (rshader->input[i].name == TGSI_SEMANTIC_COLOR || - rshader->input[i].name == TGSI_SEMANTIC_BCOLOR || - rshader->input[i].name == TGSI_SEMANTIC_POSITION) { - tmp |= S_028644_FLAT_SHADE(rctx->flatshade); - } - if (rshader->input[i].name == TGSI_SEMANTIC_GENERIC && - rctx->sprite_coord_enable & (1 << rshader->input[i].sid)) { - tmp |= S_028644_PT_SPRITE_TEX(1); - } - r600_pipe_state_add_reg(&rstate, R_028644_SPI_PS_INPUT_CNTL_0 + i * 4, tmp, 0xFFFFFFFF, NULL); - } - r600_context_pipe_state_set(&rctx->ctx, &rstate); -} - -void r600_vertex_buffer_update(struct r600_pipe_context *rctx) -{ - struct r600_pipe_state *rstate; - struct r600_resource *rbuffer; - struct pipe_vertex_buffer *vertex_buffer; - unsigned i, offset; - - /* we don't update until we know vertex elements */ - if (rctx->vertex_elements == NULL || !rctx->nvertex_buffer) - return; - - /* delete previous translated vertex elements */ - if (rctx->tran.new_velems) { - r600_end_vertex_translate(rctx); - } - - if (rctx->vertex_elements->incompatible_layout) { - /* translate rebind new vertex elements so - * return once translated - */ - r600_begin_vertex_translate(rctx); - return; - } - - if (rctx->any_user_vbs) { - r600_upload_user_buffers(rctx); - rctx->any_user_vbs = FALSE; - } - - if (rctx->vertex_elements->vbuffer_need_offset) { - /* one resource per vertex elements */ - rctx->nvs_resource = rctx->vertex_elements->count; - } else { - /* bind vertex buffer once */ - rctx->nvs_resource = rctx->nvertex_buffer; - } - - for (i = 0 ; i < rctx->nvs_resource; i++) { - rstate = &rctx->vs_resource[i]; - rstate->id = R600_PIPE_STATE_RESOURCE; - rstate->nregs = 0; - - if (rctx->vertex_elements->vbuffer_need_offset) { - /* one resource per vertex elements */ - unsigned vbuffer_index; - vbuffer_index = rctx->vertex_elements->elements[i].vertex_buffer_index; - vertex_buffer = &rctx->vertex_buffer[vbuffer_index]; - rbuffer = (struct r600_resource*)vertex_buffer->buffer; - offset = rctx->vertex_elements->vbuffer_offset[i] + - vertex_buffer->buffer_offset + - r600_bo_offset(rbuffer->bo); - } else { - /* bind vertex buffer once */ - vertex_buffer = &rctx->vertex_buffer[i]; - rbuffer = (struct r600_resource*)vertex_buffer->buffer; - offset = vertex_buffer->buffer_offset + - r600_bo_offset(rbuffer->bo); - } - - r600_pipe_state_add_reg(rstate, R_038000_RESOURCE0_WORD0, - offset, 0xFFFFFFFF, rbuffer->bo); - r600_pipe_state_add_reg(rstate, R_038004_RESOURCE0_WORD1, - rbuffer->size - offset - 1, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_038008_RESOURCE0_WORD2, - S_038008_STRIDE(vertex_buffer->stride), - 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_03800C_RESOURCE0_WORD3, - 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_038010_RESOURCE0_WORD4, - 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_038014_RESOURCE0_WORD5, - 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_038018_RESOURCE0_WORD6, - 0xC0000000, 0xFFFFFFFF, NULL); - r600_context_pipe_state_set_fs_resource(&rctx->ctx, rstate, i); - } -} - -static void r600_draw_common(struct r600_drawl *draw) -{ - struct r600_pipe_context *rctx = (struct r600_pipe_context *)draw->ctx; - struct r600_resource *rbuffer; - unsigned prim; - u32 vgt_dma_index_type, vgt_draw_initiator, mask; - struct r600_draw rdraw; - struct r600_pipe_state vgt; - - switch (draw->index_size) { - case 2: - vgt_draw_initiator = 0; - vgt_dma_index_type = 0; - break; - case 4: - vgt_draw_initiator = 0; - vgt_dma_index_type = 1; - break; - case 0: - vgt_draw_initiator = 2; - vgt_dma_index_type = 0; - break; - default: - R600_ERR("unsupported index size %d\n", draw->index_size); - return; - } - if (r600_conv_pipe_prim(draw->mode, &prim)) - return; - if (unlikely(rctx->ps_shader == NULL)) { - R600_ERR("missing vertex shader\n"); - return; - } - if (unlikely(rctx->vs_shader == NULL)) { - R600_ERR("missing vertex shader\n"); - return; - } - /* there should be enough input */ - if (rctx->vertex_elements->count < rctx->vs_shader->shader.bc.nresource) { - R600_ERR("%d resources provided, expecting %d\n", - rctx->vertex_elements->count, rctx->vs_shader->shader.bc.nresource); - return; - } - - r600_spi_update(rctx); - - mask = 0; - for (int i = 0; i < rctx->framebuffer.nr_cbufs; i++) { - mask |= (0xF << (i * 4)); - } - - vgt.id = R600_PIPE_STATE_VGT; - vgt.nregs = 0; - r600_pipe_state_add_reg(&vgt, R_008958_VGT_PRIMITIVE_TYPE, prim, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(&vgt, R_028408_VGT_INDX_OFFSET, draw->index_bias, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(&vgt, R_028400_VGT_MAX_VTX_INDX, draw->max_index, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(&vgt, R_028404_VGT_MIN_VTX_INDX, draw->min_index, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(&vgt, R_028238_CB_TARGET_MASK, rctx->cb_target_mask & mask, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(&vgt, R_03CFF0_SQ_VTX_BASE_VTX_LOC, 0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(&vgt, R_03CFF4_SQ_VTX_START_INST_LOC, 0, 0xFFFFFFFF, NULL); - r600_context_pipe_state_set(&rctx->ctx, &vgt); - - rdraw.vgt_num_indices = draw->count; - rdraw.vgt_num_instances = 1; - rdraw.vgt_index_type = vgt_dma_index_type; - rdraw.vgt_draw_initiator = vgt_draw_initiator; - rdraw.indices = NULL; - if (draw->index_buffer) { - rbuffer = (struct r600_resource*)draw->index_buffer; - rdraw.indices = rbuffer->bo; - rdraw.indices_bo_offset = draw->index_buffer_offset; - } - r600_context_draw(&rctx->ctx, &rdraw); -} - -void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info) -{ - struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - struct r600_drawl draw; - boolean translate = FALSE; - - memset(&draw, 0, sizeof(struct r600_drawl)); - draw.ctx = ctx; - draw.mode = info->mode; - draw.start = info->start; - draw.count = info->count; - if (info->indexed && rctx->index_buffer.buffer) { - draw.start += rctx->index_buffer.offset / rctx->index_buffer.index_size; - draw.min_index = info->min_index; - draw.max_index = info->max_index; - draw.index_bias = info->index_bias; - - r600_translate_index_buffer(rctx, &rctx->index_buffer.buffer, - &rctx->index_buffer.index_size, - &draw.start, - info->count); - - draw.index_size = rctx->index_buffer.index_size; - pipe_resource_reference(&draw.index_buffer, rctx->index_buffer.buffer); - draw.index_buffer_offset = draw.start * draw.index_size; - draw.start = 0; - r600_upload_index_buffer(rctx, &draw); - } else { - draw.index_size = 0; - draw.index_buffer = NULL; - draw.min_index = info->min_index; - draw.max_index = info->max_index; - draw.index_bias = info->start; - } - r600_draw_common(&draw); - - if (translate) - r600_end_vertex_translate(rctx); - - pipe_resource_reference(&draw.index_buffer, NULL); -} - static void r600_set_blend_color(struct pipe_context *ctx, const struct pipe_blend_color *state) { @@ -341,9 +117,10 @@ static void r600_set_blend_color(struct pipe_context *ctx, static void *r600_create_blend_state(struct pipe_context *ctx, const struct pipe_blend_state *state) { + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; struct r600_pipe_blend *blend = CALLOC_STRUCT(r600_pipe_blend); struct r600_pipe_state *rstate; - u32 color_control, target_mask; + u32 color_control = 0, target_mask; if (blend == NULL) { return NULL; @@ -353,7 +130,10 @@ static void *r600_create_blend_state(struct pipe_context *ctx, rstate->id = R600_PIPE_STATE_BLEND; target_mask = 0; - color_control = S_028808_PER_MRT_BLEND(1); + + /* R600 does not support per-MRT blends */ + if (rctx->family > CHIP_R600) + color_control |= S_028808_PER_MRT_BLEND(1); if (state->logicop_enable) { color_control |= (state->logicop_func << 16) | (state->logicop_func << 20); } else { @@ -376,8 +156,9 @@ static void *r600_create_blend_state(struct pipe_context *ctx, } } blend->cb_target_mask = target_mask; + /* MULTIWRITE_ENABLE is controlled by r600_pipe_shader_ps(). */ r600_pipe_state_add_reg(rstate, R_028808_CB_COLOR_CONTROL, - color_control, 0xFFFFFFFF, NULL); + color_control, 0xFFFFFFFD, NULL); for (int i = 0; i < 8; i++) { unsigned eqRGB = state->rt[i].rgb_func; @@ -403,10 +184,11 @@ static void *r600_create_blend_state(struct pipe_context *ctx, bc |= S_028804_ALPHA_DESTBLEND(r600_translate_blend_factor(dstA)); } - r600_pipe_state_add_reg(rstate, R_028780_CB_BLEND0_CONTROL + i * 4, bc, 0xFFFFFFFF, NULL); - if (i == 0) { + /* R600 does not support per-MRT blends */ + if (rctx->family > CHIP_R600) + r600_pipe_state_add_reg(rstate, R_028780_CB_BLEND0_CONTROL + i * 4, bc, 0xFFFFFFFF, NULL); + if (i == 0) r600_pipe_state_add_reg(rstate, R_028804_CB_BLEND_CONTROL, bc, 0xFFFFFFFF, NULL); - } } return rstate; } @@ -424,10 +206,6 @@ static void *r600_create_dsa_state(struct pipe_context *ctx, rstate->id = R600_PIPE_STATE_DSA; /* depth TODO some of those db_shader_control field depend on shader adjust mask & add it to shader */ - /* db_shader_control is 0xFFFFFFBE as Z_EXPORT_ENABLE (bit 0) will be - * set by fragment shader if it export Z and KILL_ENABLE (bit 6) will - * be set if shader use texkill instruction - */ db_shader_control = S_02880C_Z_ORDER(V_02880C_EARLY_Z_THEN_LATE_Z); stencil_ref_mask = 0; stencil_ref_mask_bf = 0; @@ -486,7 +264,10 @@ static void *r600_create_dsa_state(struct pipe_context *ctx, r600_pipe_state_add_reg(rstate, R_0286E4_SPI_FOG_FUNC_BIAS, 0x00000000, 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_0286DC_SPI_FOG_CNTL, 0x00000000, 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_028800_DB_DEPTH_CONTROL, db_depth_control, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_02880C_DB_SHADER_CONTROL, db_shader_control, 0xFFFFFFBE, NULL); + /* The DB_SHADER_CONTROL mask is 0xFFFFFFBC since Z_EXPORT_ENABLE, + * STENCIL_EXPORT_ENABLE and KILL_ENABLE are controlled by + * r600_pipe_shader_ps().*/ + r600_pipe_state_add_reg(rstate, R_02880C_DB_SHADER_CONTROL, db_shader_control, 0xFFFFFFBC, NULL); r600_pipe_state_add_reg(rstate, R_028D0C_DB_RENDER_CONTROL, db_render_control, 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_028D10_DB_RENDER_OVERRIDE, db_render_override, 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_028D2C_DB_SRESULTS_COMPARE_STATE1, 0x00000000, 0xFFFFFFFF, NULL); @@ -582,11 +363,16 @@ static void *r600_create_sampler_state(struct pipe_context *ctx, { struct r600_pipe_state *rstate = CALLOC_STRUCT(r600_pipe_state); union util_color uc; + uint32_t coord_trunc = 0; if (rstate == NULL) { return NULL; } + if ((state->mag_img_filter == PIPE_TEX_FILTER_NEAREST) || + (state->min_img_filter == PIPE_TEX_FILTER_NEAREST)) + coord_trunc = 1; + rstate->id = R600_PIPE_STATE_SAMPLER; util_pack_color(state->border_color, PIPE_FORMAT_B8G8R8A8_UNORM, &uc); r600_pipe_state_add_reg(rstate, R_03C000_SQ_TEX_SAMPLER_WORD0_0, @@ -603,7 +389,9 @@ static void *r600_create_sampler_state(struct pipe_context *ctx, S_03C004_MIN_LOD(S_FIXED(CLAMP(state->min_lod, 0, 15), 6)) | S_03C004_MAX_LOD(S_FIXED(CLAMP(state->max_lod, 0, 15), 6)) | S_03C004_LOD_BIAS(S_FIXED(CLAMP(state->lod_bias, -16, 16), 6)), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_03C008_SQ_TEX_SAMPLER_WORD2_0, S_03C008_TYPE(1), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_03C008_SQ_TEX_SAMPLER_WORD2_0, + S_03C008_MC_COORD_TRUNCATE(coord_trunc) | + S_03C008_TYPE(1), 0xFFFFFFFF, NULL); if (uc.ui) { r600_pipe_state_add_reg(rstate, R_00A400_TD_PS_SAMPLER0_BORDER_RED, fui(state->border_color[0]), 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_00A404_TD_PS_SAMPLER0_BORDER_GREEN, fui(state->border_color[1]), 0xFFFFFFFF, NULL); @@ -626,6 +414,7 @@ static struct pipe_sampler_view *r600_create_sampler_view(struct pipe_context *c uint32_t word4 = 0, yuv_format = 0, pitch = 0; unsigned char swizzle[4], array_mode = 0, tile_type = 0; struct r600_bo *bo[2]; + unsigned height, depth; if (resource == NULL) return NULL; @@ -643,7 +432,7 @@ static struct pipe_sampler_view *r600_create_sampler_view(struct pipe_context *c swizzle[1] = state->swizzle_g; swizzle[2] = state->swizzle_b; swizzle[3] = state->swizzle_a; - format = r600_translate_texformat(state->format, + format = r600_translate_texformat(ctx->screen, state->format, swizzle, &word4, &yuv_format); if (format == ~0) { @@ -653,22 +442,30 @@ static struct pipe_sampler_view *r600_create_sampler_view(struct pipe_context *c if (desc == NULL) { R600_ERR("unknow format %d\n", state->format); } - tmp = (struct r600_resource_texture*)texture; + tmp = (struct r600_resource_texture *)texture; + if (tmp->depth && !tmp->is_flushing_texture) { + r600_texture_depth_flush(ctx, texture, TRUE); + tmp = tmp->flushed_depth_texture; + } + + if (tmp->force_int_type) { + word4 &= C_038010_NUM_FORMAT_ALL; + word4 |= S_038010_NUM_FORMAT_ALL(V_038010_SQ_NUM_FORMAT_INT); + } rbuffer = &tmp->resource; bo[0] = rbuffer->bo; bo[1] = rbuffer->bo; - /* FIXME depth texture decompression */ - if (tmp->depth) { - r600_texture_depth_flush(ctx, texture); - tmp = (struct r600_resource_texture*)texture; - rbuffer = &tmp->flushed_depth_texture->resource; - bo[0] = rbuffer->bo; - bo[1] = rbuffer->bo; - } - pitch = align(tmp->pitch_in_pixels[0], 8); - if (tmp->tiled) { - array_mode = tmp->array_mode[0]; - tile_type = tmp->tile_type; + pitch = align(tmp->pitch_in_blocks[0] * util_format_get_blockwidth(state->format), 8); + array_mode = tmp->array_mode[0]; + tile_type = tmp->tile_type; + + height = texture->height0; + depth = texture->depth0; + if (texture->target == PIPE_TEXTURE_1D_ARRAY) { + height = 1; + depth = texture->array_size; + } else if (texture->target == PIPE_TEXTURE_2D_ARRAY) { + depth = texture->array_size; } /* FIXME properly handle first level != 0 */ @@ -679,22 +476,22 @@ static struct pipe_sampler_view *r600_create_sampler_view(struct pipe_context *c S_038000_PITCH((pitch / 8) - 1) | S_038000_TEX_WIDTH(texture->width0 - 1), 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_038004_RESOURCE0_WORD1, - S_038004_TEX_HEIGHT(texture->height0 - 1) | - S_038004_TEX_DEPTH(texture->depth0 - 1) | + S_038004_TEX_HEIGHT(height - 1) | + S_038004_TEX_DEPTH(depth - 1) | S_038004_DATA_FORMAT(format), 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_038008_RESOURCE0_WORD2, (tmp->offset[0] + r600_bo_offset(bo[0])) >> 8, 0xFFFFFFFF, bo[0]); r600_pipe_state_add_reg(rstate, R_03800C_RESOURCE0_WORD3, (tmp->offset[1] + r600_bo_offset(bo[1])) >> 8, 0xFFFFFFFF, bo[1]); r600_pipe_state_add_reg(rstate, R_038010_RESOURCE0_WORD4, - word4 | S_038010_NUM_FORMAT_ALL(V_038010_SQ_NUM_FORMAT_NORM) | - S_038010_SRF_MODE_ALL(V_038010_SFR_MODE_NO_ZERO) | + word4 | + S_038010_SRF_MODE_ALL(V_038010_SRF_MODE_NO_ZERO) | S_038010_REQUEST_SIZE(1) | S_038010_BASE_LEVEL(state->u.tex.first_level), 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_038014_RESOURCE0_WORD5, S_038014_LAST_LEVEL(state->u.tex.last_level) | - S_038014_BASE_ARRAY(0) | - S_038014_LAST_ARRAY(0), 0xFFFFFFFF, NULL); + S_038014_BASE_ARRAY(state->u.tex.first_layer) | + S_038014_LAST_ARRAY(state->u.tex.last_layer), 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_038018_RESOURCE0_WORD6, S_038018_TYPE(V_038010_SQ_TEX_VTX_VALID_TEXTURE), 0xFFFFFFFF, NULL); @@ -709,7 +506,8 @@ static void r600_set_vs_sampler_view(struct pipe_context *ctx, unsigned count, for (int i = 0; i < count; i++) { if (resource[i]) { - r600_context_pipe_state_set_vs_resource(&rctx->ctx, &resource[i]->state, i); + r600_context_pipe_state_set_vs_resource(&rctx->ctx, &resource[i]->state, + i + R600_MAX_CONST_BUFFERS); } } } @@ -724,9 +522,11 @@ static void r600_set_ps_sampler_view(struct pipe_context *ctx, unsigned count, for (i = 0; i < count; i++) { if (&rctx->ps_samplers.views[i]->base != views[i]) { if (resource[i]) - r600_context_pipe_state_set_ps_resource(&rctx->ctx, &resource[i]->state, i); + r600_context_pipe_state_set_ps_resource(&rctx->ctx, &resource[i]->state, + i + R600_MAX_CONST_BUFFERS); else - r600_context_pipe_state_set_ps_resource(&rctx->ctx, NULL, i); + r600_context_pipe_state_set_ps_resource(&rctx->ctx, NULL, + i + R600_MAX_CONST_BUFFERS); pipe_sampler_view_reference( (struct pipe_sampler_view **)&rctx->ps_samplers.views[i], @@ -736,7 +536,8 @@ static void r600_set_ps_sampler_view(struct pipe_context *ctx, unsigned count, } for (i = count; i < NUM_TEX_UNITS; i++) { if (rctx->ps_samplers.views[i]) { - r600_context_pipe_state_set_ps_resource(&rctx->ctx, NULL, i); + r600_context_pipe_state_set_ps_resource(&rctx->ctx, NULL, + i + R600_MAX_CONST_BUFFERS); pipe_sampler_view_reference((struct pipe_sampler_view **)&rctx->ps_samplers.views[i], NULL); } } @@ -918,33 +719,55 @@ static void r600_cb(struct r600_pipe_context *rctx, struct r600_pipe_state *rsta unsigned offset; const struct util_format_description *desc; struct r600_bo *bo[3]; + int i; surf = (struct r600_surface *)state->cbufs[cb]; rtex = (struct r600_resource_texture*)state->cbufs[cb]->texture; + + if (rtex->depth && !rtex->is_flushing_texture) { + r600_texture_depth_flush(&rctx->context, state->cbufs[cb]->texture, TRUE); + rtex = rtex->flushed_depth_texture; + } + rbuffer = &rtex->resource; bo[0] = rbuffer->bo; bo[1] = rbuffer->bo; bo[2] = rbuffer->bo; /* XXX quite sure for dx10+ hw don't need any offset hacks */ - offset = r600_texture_get_offset((struct r600_resource_texture *)state->cbufs[cb]->texture, + offset = r600_texture_get_offset(rtex, level, state->cbufs[cb]->u.tex.first_layer); - pitch = rtex->pitch_in_pixels[level] / 8 - 1; - slice = rtex->pitch_in_pixels[level] * surf->aligned_height / 64 - 1; + pitch = rtex->pitch_in_blocks[level] / 8 - 1; + slice = rtex->pitch_in_blocks[level] * surf->aligned_height / 64 - 1; ntype = 0; - desc = util_format_description(rtex->resource.base.b.format); + desc = util_format_description(surf->base.format); if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) ntype = V_0280A0_NUMBER_SRGB; - format = r600_translate_colorformat(rtex->resource.base.b.format); - swap = r600_translate_colorswap(rtex->resource.base.b.format); + for (i = 0; i < 4; i++) { + if (desc->channel[i].type != UTIL_FORMAT_TYPE_VOID) { + break; + } + } + + format = r600_translate_colorformat(surf->base.format); + swap = r600_translate_colorswap(surf->base.format); + + /* disable when gallium grows int textures */ + if ((format == FMT_32_32_32_32 || format == FMT_16_16_16_16) && rtex->force_int_type) + ntype = 4; + color_info = S_0280A0_FORMAT(format) | S_0280A0_COMP_SWAP(swap) | S_0280A0_ARRAY_MODE(rtex->array_mode[level]) | S_0280A0_BLEND_CLAMP(1) | S_0280A0_NUMBER_TYPE(ntype); - if (desc->colorspace != UTIL_FORMAT_COLORSPACE_ZS) - color_info |= S_0280A0_SOURCE_FORMAT(1); + + /* on R600 this can't be set if BLEND_CLAMP isn't set, + if BLEND_FLOAT32 is set of > 11 bits in a UNORM or SNORM */ + if (desc->colorspace != UTIL_FORMAT_COLORSPACE_ZS && + desc->channel[i].size < 12) + color_info |= S_0280A0_SOURCE_FORMAT(V_0280A0_EXPORT_NORM); r600_pipe_state_add_reg(rstate, R_028040_CB_COLOR0_BASE + cb * 4, @@ -988,17 +811,14 @@ static void r600_db(struct r600_pipe_context *rctx, struct r600_pipe_state *rsta surf = (struct r600_surface *)state->zsbuf; rtex = (struct r600_resource_texture*)state->zsbuf->texture; - rtex->tiled = 1; - rtex->array_mode[level] = 2; - rtex->tile_type = 1; - rtex->depth = 1; + rbuffer = &rtex->resource; /* XXX quite sure for dx10+ hw don't need any offset hacks */ offset = r600_texture_get_offset((struct r600_resource_texture *)state->zsbuf->texture, level, state->zsbuf->u.tex.first_layer); - pitch = rtex->pitch_in_pixels[level] / 8 - 1; - slice = rtex->pitch_in_pixels[level] * surf->aligned_height / 64 - 1; + pitch = rtex->pitch_in_blocks[level] / 8 - 1; + slice = rtex->pitch_in_blocks[level] * surf->aligned_height / 64 - 1; format = r600_translate_dbformat(state->zsbuf->texture->format); r600_pipe_state_add_reg(rstate, R_02800C_DB_DEPTH_BASE, @@ -1029,8 +849,6 @@ static void r600_set_framebuffer_state(struct pipe_context *ctx, util_copy_framebuffer_state(&rctx->framebuffer, state); - rctx->pframebuffer = &rctx->framebuffer; - /* build states */ for (int i = 0; i < state->nr_cbufs; i++) { r600_cb(rctx, rstate, state, i); @@ -1116,48 +934,6 @@ static void r600_set_framebuffer_state(struct pipe_context *ctx, } } -static void r600_set_constant_buffer(struct pipe_context *ctx, uint shader, uint index, - struct pipe_resource *buffer) -{ - struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - struct r600_resource *rbuffer = (struct r600_resource*)buffer; - - /* Note that the state tracker can unbind constant buffers by - * passing NULL here. - */ - if (buffer == NULL) { - return; - } - - switch (shader) { - case PIPE_SHADER_VERTEX: - rctx->vs_const_buffer.nregs = 0; - r600_pipe_state_add_reg(&rctx->vs_const_buffer, - R_028180_ALU_CONST_BUFFER_SIZE_VS_0, - ALIGN_DIVUP(buffer->width0 >> 4, 16), - 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(&rctx->vs_const_buffer, - R_028980_ALU_CONST_CACHE_VS_0, - r600_bo_offset(rbuffer->bo) >> 8, 0xFFFFFFFF, rbuffer->bo); - r600_context_pipe_state_set(&rctx->ctx, &rctx->vs_const_buffer); - break; - case PIPE_SHADER_FRAGMENT: - rctx->ps_const_buffer.nregs = 0; - r600_pipe_state_add_reg(&rctx->ps_const_buffer, - R_028140_ALU_CONST_BUFFER_SIZE_PS_0, - ALIGN_DIVUP(buffer->width0 >> 4, 16), - 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(&rctx->ps_const_buffer, - R_028940_ALU_CONST_CACHE_PS_0, - r600_bo_offset(rbuffer->bo) >> 8, 0xFFFFFFFF, rbuffer->bo); - r600_context_pipe_state_set(&rctx->ctx, &rctx->ps_const_buffer); - break; - default: - R600_ERR("unsupported %d\n", shader); - return; - } -} - void r600_init_state_functions(struct r600_pipe_context *rctx) { rctx->context.create_blend_state = r600_create_blend_state; @@ -1197,6 +973,7 @@ void r600_init_state_functions(struct r600_pipe_context *rctx) rctx->context.set_vertex_sampler_views = r600_set_vs_sampler_view; rctx->context.set_viewport_state = r600_set_viewport_state; rctx->context.sampler_view_destroy = r600_sampler_view_destroy; + rctx->context.redefine_user_buffer = u_default_redefine_user_buffer; } void r600_init_config(struct r600_pipe_context *rctx) @@ -1450,6 +1227,163 @@ void r600_init_config(struct r600_pipe_context *rctx) r600_context_pipe_state_set(&rctx->ctx, rstate); } +void r600_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader *shader) +{ + struct r600_pipe_state *rstate = &shader->rstate; + struct r600_shader *rshader = &shader->shader; + unsigned i, exports_ps, num_cout, spi_ps_in_control_0, spi_input_z, spi_ps_in_control_1, db_shader_control; + int pos_index = -1, face_index = -1; + + rstate->nregs = 0; + + for (i = 0; i < rshader->ninput; i++) { + if (rshader->input[i].name == TGSI_SEMANTIC_POSITION) + pos_index = i; + if (rshader->input[i].name == TGSI_SEMANTIC_FACE) + face_index = i; + } + + db_shader_control = 0; + for (i = 0; i < rshader->noutput; i++) { + if (rshader->output[i].name == TGSI_SEMANTIC_POSITION) + db_shader_control |= S_02880C_Z_EXPORT_ENABLE(1); + if (rshader->output[i].name == TGSI_SEMANTIC_STENCIL) + db_shader_control |= S_02880C_STENCIL_REF_EXPORT_ENABLE(1); + } + if (rshader->uses_kill) + db_shader_control |= S_02880C_KILL_ENABLE(1); + + exports_ps = 0; + num_cout = 0; + for (i = 0; i < rshader->noutput; i++) { + if (rshader->output[i].name == TGSI_SEMANTIC_POSITION || + rshader->output[i].name == TGSI_SEMANTIC_STENCIL) + exports_ps |= 1; + else if (rshader->output[i].name == TGSI_SEMANTIC_COLOR) { + num_cout++; + } + } + exports_ps |= S_028854_EXPORT_COLORS(num_cout); + if (!exports_ps) { + /* always at least export 1 component per pixel */ + exports_ps = 2; + } + + spi_ps_in_control_0 = S_0286CC_NUM_INTERP(rshader->ninput) | + S_0286CC_PERSP_GRADIENT_ENA(1); + spi_input_z = 0; + if (pos_index != -1) { + spi_ps_in_control_0 |= (S_0286CC_POSITION_ENA(1) | + S_0286CC_POSITION_CENTROID(rshader->input[pos_index].centroid) | + S_0286CC_POSITION_ADDR(rshader->input[pos_index].gpr) | + S_0286CC_BARYC_SAMPLE_CNTL(1)); + spi_input_z |= 1; + } + + spi_ps_in_control_1 = 0; + if (face_index != -1) { + spi_ps_in_control_1 |= S_0286D0_FRONT_FACE_ENA(1) | + S_0286D0_FRONT_FACE_ADDR(rshader->input[face_index].gpr); + } + + r600_pipe_state_add_reg(rstate, R_0286CC_SPI_PS_IN_CONTROL_0, spi_ps_in_control_0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0286D0_SPI_PS_IN_CONTROL_1, spi_ps_in_control_1, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0286D8_SPI_INPUT_Z, spi_input_z, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_028840_SQ_PGM_START_PS, + r600_bo_offset(shader->bo) >> 8, 0xFFFFFFFF, shader->bo); + r600_pipe_state_add_reg(rstate, + R_028850_SQ_PGM_RESOURCES_PS, + S_028868_NUM_GPRS(rshader->bc.ngpr) | + S_028868_STACK_SIZE(rshader->bc.nstack), + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_028854_SQ_PGM_EXPORTS_PS, + exports_ps, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_0288CC_SQ_PGM_CF_OFFSET_PS, + 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028808_CB_COLOR_CONTROL, + S_028808_MULTIWRITE_ENABLE(!!rshader->fs_write_all), + S_028808_MULTIWRITE_ENABLE(1), + NULL); + /* only set some bits here, the other bits are set in the dsa state */ + r600_pipe_state_add_reg(rstate, R_02880C_DB_SHADER_CONTROL, + db_shader_control, + S_02880C_Z_EXPORT_ENABLE(1) | + S_02880C_STENCIL_REF_EXPORT_ENABLE(1) | + S_02880C_KILL_ENABLE(1), + NULL); + + r600_pipe_state_add_reg(rstate, + R_03E200_SQ_LOOP_CONST_0, 0x01000FFF, + 0xFFFFFFFF, NULL); +} + +void r600_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shader *shader) +{ + struct r600_pipe_state *rstate = &shader->rstate; + struct r600_shader *rshader = &shader->shader; + unsigned spi_vs_out_id[10]; + unsigned i, tmp; + + /* clear previous register */ + rstate->nregs = 0; + + /* so far never got proper semantic id from tgsi */ + /* FIXME better to move this in config things so they get emited + * only one time per cs + */ + for (i = 0; i < 10; i++) { + spi_vs_out_id[i] = 0; + } + for (i = 0; i < 32; i++) { + tmp = i << ((i & 3) * 8); + spi_vs_out_id[i / 4] |= tmp; + } + for (i = 0; i < 10; i++) { + r600_pipe_state_add_reg(rstate, + R_028614_SPI_VS_OUT_ID_0 + i * 4, + spi_vs_out_id[i], 0xFFFFFFFF, NULL); + } + + r600_pipe_state_add_reg(rstate, + R_0286C4_SPI_VS_OUT_CONFIG, + S_0286C4_VS_EXPORT_COUNT(rshader->noutput - 2), + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_028868_SQ_PGM_RESOURCES_VS, + S_028868_NUM_GPRS(rshader->bc.ngpr) | + S_028868_STACK_SIZE(rshader->bc.nstack), + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_0288D0_SQ_PGM_CF_OFFSET_VS, + 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_028858_SQ_PGM_START_VS, + r600_bo_offset(shader->bo) >> 8, 0xFFFFFFFF, shader->bo); + + r600_pipe_state_add_reg(rstate, + R_03E200_SQ_LOOP_CONST_0 + (32 * 4), 0x01000FFF, + 0xFFFFFFFF, NULL); +} + +void r600_fetch_shader(struct r600_vertex_element *ve) +{ + struct r600_pipe_state *rstate; + + rstate = &ve->rstate; + rstate->id = R600_PIPE_STATE_FETCH_SHADER; + rstate->nregs = 0; + r600_pipe_state_add_reg(rstate, R_0288A4_SQ_PGM_RESOURCES_FS, + 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0288DC_SQ_PGM_CF_OFFSET_FS, + 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028894_SQ_PGM_START_FS, + r600_bo_offset(ve->fetch_shader) >> 8, + 0xFFFFFFFF, ve->fetch_shader); +} + void *r600_create_db_flush_dsa(struct r600_pipe_context *rctx) { struct pipe_depth_stencil_alpha_state dsa; @@ -1487,3 +1421,25 @@ void *r600_create_db_flush_dsa(struct r600_pipe_context *rctx) S_028D0C_COPY_CENTROID(1), NULL); return rstate; } + +void r600_pipe_set_buffer_resource(struct r600_pipe_context *rctx, + struct r600_pipe_state *rstate, + struct r600_resource *rbuffer, + unsigned offset, unsigned stride) +{ + r600_pipe_state_add_reg(rstate, R_038000_RESOURCE0_WORD0, + offset, 0xFFFFFFFF, rbuffer->bo); + r600_pipe_state_add_reg(rstate, R_038004_RESOURCE0_WORD1, + rbuffer->bo_size - offset - 1, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_038008_RESOURCE0_WORD2, + S_038008_STRIDE(stride), + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_03800C_RESOURCE0_WORD3, + 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_038010_RESOURCE0_WORD4, + 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_038014_RESOURCE0_WORD5, + 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_038018_RESOURCE0_WORD6, + 0xC0000000, 0xFFFFFFFF, NULL); +} diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c index c647e77b37..43dad0c802 100644 --- a/src/gallium/drivers/r600/r600_state_common.c +++ b/src/gallium/drivers/r600/r600_state_common.c @@ -27,7 +27,9 @@ #include <util/u_memory.h> #include <util/u_format.h> #include <pipebuffer/pb_buffer.h> +#include "pipe/p_shader_tokens.h" #include "r600_pipe.h" +#include "r600d.h" /* common state between evergreen and r600 */ void r600_bind_blend_state(struct pipe_context *ctx, void *state) @@ -121,17 +123,11 @@ void r600_bind_vertex_elements(struct pipe_context *ctx, void *state) rctx->vertex_elements = v; if (v) { + u_vbuf_mgr_bind_vertex_elements(rctx->vbuf_mgr, state, + v->vmgr_elements); + rctx->states[v->rstate.id] = &v->rstate; r600_context_pipe_state_set(&rctx->ctx, &v->rstate); - if (rctx->family >= CHIP_CEDAR) { - evergreen_vertex_buffer_update(rctx); - } else { - r600_vertex_buffer_update(rctx); - } - } - - if (v) { -// rctx->vs_rebuild = TRUE; } } @@ -147,6 +143,7 @@ void r600_delete_vertex_element(struct pipe_context *ctx, void *state) rctx->vertex_elements = NULL; r600_bo_reference(rctx->radeon, &v->fetch_shader, NULL); + u_vbuf_mgr_destroy_vertex_elements(rctx->vbuf_mgr, v->vmgr_elements); FREE(state); } @@ -171,42 +168,28 @@ void r600_set_vertex_buffers(struct pipe_context *ctx, unsigned count, const struct pipe_vertex_buffer *buffers) { struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - struct pipe_vertex_buffer *vbo; - unsigned max_index = (unsigned)-1; - - for (int i = 0; i < rctx->nvertex_buffer; i++) { - pipe_resource_reference(&rctx->vertex_buffer[i].buffer, NULL); - } - memcpy(rctx->vertex_buffer, buffers, sizeof(struct pipe_vertex_buffer) * count); - - for (int i = 0; i < count; i++) { - vbo = (struct pipe_vertex_buffer*)&buffers[i]; - - rctx->vertex_buffer[i].buffer = NULL; - if (r600_buffer_is_user_buffer(buffers[i].buffer)) - rctx->any_user_vbs = TRUE; - pipe_resource_reference(&rctx->vertex_buffer[i].buffer, buffers[i].buffer); + int i; - if (vbo->max_index == ~0) { - if (!vbo->stride) - vbo->max_index = 1; - else - vbo->max_index = (vbo->buffer->width0 - vbo->buffer_offset) / vbo->stride; + /* Zero states. */ + for (i = 0; i < count; i++) { + if (!buffers[i].buffer) { + if (rctx->family >= CHIP_CEDAR) { + evergreen_context_pipe_state_set_fs_resource(&rctx->ctx, NULL, i); + } else { + r600_context_pipe_state_set_fs_resource(&rctx->ctx, NULL, i); + } } - max_index = MIN2(vbo->max_index, max_index); } - rctx->nvertex_buffer = count; - rctx->vb_max_index = max_index; - if (rctx->family >= CHIP_CEDAR) { - evergreen_vertex_buffer_update(rctx); - } else { - r600_vertex_buffer_update(rctx); + for (; i < rctx->vbuf_mgr->nr_real_vertex_buffers; i++) { + if (rctx->family >= CHIP_CEDAR) { + evergreen_context_pipe_state_set_fs_resource(&rctx->ctx, NULL, i); + } else { + r600_context_pipe_state_set_fs_resource(&rctx->ctx, NULL, i); + } } -} - -#define FORMAT_REPLACE(what, withwhat) \ - case PIPE_FORMAT_##what: *format = PIPE_FORMAT_##withwhat; break + u_vbuf_mgr_set_vertex_buffers(rctx->vbuf_mgr, count, buffers); +} void *r600_create_vertex_elements(struct pipe_context *ctx, unsigned count, @@ -214,33 +197,15 @@ void *r600_create_vertex_elements(struct pipe_context *ctx, { struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; struct r600_vertex_element *v = CALLOC_STRUCT(r600_vertex_element); - enum pipe_format *format; - int i; assert(count < 32); if (!v) return NULL; v->count = count; - memcpy(v->elements, elements, count * sizeof(struct pipe_vertex_element)); - - for (i = 0; i < count; i++) { - v->hw_format[i] = v->elements[i].src_format; - format = &v->hw_format[i]; - - switch (*format) { - FORMAT_REPLACE(R64_FLOAT, R32_FLOAT); - FORMAT_REPLACE(R64G64_FLOAT, R32G32_FLOAT); - FORMAT_REPLACE(R64G64B64_FLOAT, R32G32B32_FLOAT); - FORMAT_REPLACE(R64G64B64A64_FLOAT, R32G32B32A32_FLOAT); - default:; - } - v->incompatible_layout = - v->incompatible_layout || - v->elements[i].src_format != v->hw_format[i]; - - v->hw_format_size[i] = align(util_format_get_blocksize(v->hw_format[i]), 4); - } + v->vmgr_elements = + u_vbuf_mgr_create_vertex_elements(rctx->vbuf_mgr, count, + elements, v->elements); if (r600_vertex_elements_build_fetch_shader(rctx, v)) { FREE(v); @@ -310,3 +275,274 @@ void r600_delete_vs_shader(struct pipe_context *ctx, void *state) r600_pipe_shader_destroy(ctx, shader); free(shader); } + +/* FIXME optimize away spi update when it's not needed */ +void r600_spi_update(struct r600_pipe_context *rctx) +{ + struct r600_pipe_shader *shader = rctx->ps_shader; + struct r600_pipe_state rstate; + struct r600_shader *rshader = &shader->shader; + unsigned i, tmp; + + rstate.nregs = 0; + for (i = 0; i < rshader->ninput; i++) { + tmp = S_028644_SEMANTIC(r600_find_vs_semantic_index(&rctx->vs_shader->shader, rshader, i)); + + if (rshader->input[i].name == TGSI_SEMANTIC_COLOR || + rshader->input[i].name == TGSI_SEMANTIC_BCOLOR || + rshader->input[i].name == TGSI_SEMANTIC_POSITION) { + tmp |= S_028644_FLAT_SHADE(rctx->flatshade); + } + + if (rshader->input[i].name == TGSI_SEMANTIC_GENERIC && + rctx->sprite_coord_enable & (1 << rshader->input[i].sid)) { + tmp |= S_028644_PT_SPRITE_TEX(1); + } + + if (rctx->family < CHIP_CEDAR) { + if (rshader->input[i].centroid) + tmp |= S_028644_SEL_CENTROID(1); + + if (rshader->input[i].interpolate == TGSI_INTERPOLATE_LINEAR) + tmp |= S_028644_SEL_LINEAR(1); + } + + r600_pipe_state_add_reg(&rstate, R_028644_SPI_PS_INPUT_CNTL_0 + i * 4, tmp, 0xFFFFFFFF, NULL); + } + r600_context_pipe_state_set(&rctx->ctx, &rstate); +} + +void r600_set_constant_buffer(struct pipe_context *ctx, uint shader, uint index, + struct pipe_resource *buffer) +{ + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + struct r600_resource_buffer *rbuffer = r600_buffer(buffer); + struct r600_pipe_state *rstate; + uint32_t offset; + + /* Note that the state tracker can unbind constant buffers by + * passing NULL here. + */ + if (buffer == NULL) { + return; + } + + r600_upload_const_buffer(rctx, &rbuffer, &offset); + offset += r600_bo_offset(rbuffer->r.bo); + + switch (shader) { + case PIPE_SHADER_VERTEX: + rctx->vs_const_buffer.nregs = 0; + r600_pipe_state_add_reg(&rctx->vs_const_buffer, + R_028180_ALU_CONST_BUFFER_SIZE_VS_0, + ALIGN_DIVUP(buffer->width0 >> 4, 16), + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(&rctx->vs_const_buffer, + R_028980_ALU_CONST_CACHE_VS_0, + offset >> 8, 0xFFFFFFFF, rbuffer->r.bo); + r600_context_pipe_state_set(&rctx->ctx, &rctx->vs_const_buffer); + + rstate = &rctx->vs_const_buffer_resource[index]; + rstate->id = R600_PIPE_STATE_RESOURCE; + rstate->nregs = 0; + if (rctx->family >= CHIP_CEDAR) { + evergreen_pipe_set_buffer_resource(rctx, rstate, &rbuffer->r, offset, 16); + evergreen_context_pipe_state_set_vs_resource(&rctx->ctx, rstate, index); + } else { + r600_pipe_set_buffer_resource(rctx, rstate, &rbuffer->r, offset, 16); + r600_context_pipe_state_set_vs_resource(&rctx->ctx, rstate, index); + } + break; + case PIPE_SHADER_FRAGMENT: + rctx->ps_const_buffer.nregs = 0; + r600_pipe_state_add_reg(&rctx->ps_const_buffer, + R_028140_ALU_CONST_BUFFER_SIZE_PS_0, + ALIGN_DIVUP(buffer->width0 >> 4, 16), + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(&rctx->ps_const_buffer, + R_028940_ALU_CONST_CACHE_PS_0, + offset >> 8, 0xFFFFFFFF, rbuffer->r.bo); + r600_context_pipe_state_set(&rctx->ctx, &rctx->ps_const_buffer); + + rstate = &rctx->ps_const_buffer_resource[index]; + rstate->id = R600_PIPE_STATE_RESOURCE; + rstate->nregs = 0; + if (rctx->family >= CHIP_CEDAR) { + evergreen_pipe_set_buffer_resource(rctx, rstate, &rbuffer->r, offset, 16); + evergreen_context_pipe_state_set_ps_resource(&rctx->ctx, rstate, index); + } else { + r600_pipe_set_buffer_resource(rctx, rstate, &rbuffer->r, offset, 16); + r600_context_pipe_state_set_ps_resource(&rctx->ctx, rstate, index); + } + break; + default: + R600_ERR("unsupported %d\n", shader); + return; + } + + if (buffer != &rbuffer->r.b.b.b) + pipe_resource_reference((struct pipe_resource**)&rbuffer, NULL); +} + +static void r600_vertex_buffer_update(struct r600_pipe_context *rctx) +{ + struct r600_pipe_state *rstate; + struct r600_resource *rbuffer; + struct pipe_vertex_buffer *vertex_buffer; + unsigned i, count, offset; + + if (rctx->vertex_elements->vbuffer_need_offset) { + /* one resource per vertex elements */ + count = rctx->vertex_elements->count; + } else { + /* bind vertex buffer once */ + count = rctx->vbuf_mgr->nr_real_vertex_buffers; + } + + for (i = 0 ; i < count; i++) { + rstate = &rctx->fs_resource[i]; + rstate->id = R600_PIPE_STATE_RESOURCE; + rstate->nregs = 0; + + if (rctx->vertex_elements->vbuffer_need_offset) { + /* one resource per vertex elements */ + unsigned vbuffer_index; + vbuffer_index = rctx->vertex_elements->elements[i].vertex_buffer_index; + vertex_buffer = &rctx->vbuf_mgr->vertex_buffer[vbuffer_index]; + rbuffer = (struct r600_resource*)rctx->vbuf_mgr->real_vertex_buffer[vbuffer_index]; + offset = rctx->vertex_elements->vbuffer_offset[i]; + } else { + /* bind vertex buffer once */ + vertex_buffer = &rctx->vbuf_mgr->vertex_buffer[i]; + rbuffer = (struct r600_resource*)rctx->vbuf_mgr->real_vertex_buffer[i]; + offset = 0; + } + if (vertex_buffer == NULL || rbuffer == NULL) + continue; + offset += vertex_buffer->buffer_offset + r600_bo_offset(rbuffer->bo); + + if (rctx->family >= CHIP_CEDAR) { + evergreen_pipe_set_buffer_resource(rctx, rstate, rbuffer, offset, vertex_buffer->stride); + evergreen_context_pipe_state_set_fs_resource(&rctx->ctx, rstate, i); + } else { + r600_pipe_set_buffer_resource(rctx, rstate, rbuffer, offset, vertex_buffer->stride); + r600_context_pipe_state_set_fs_resource(&rctx->ctx, rstate, i); + } + } +} + +void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info) +{ + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + struct r600_resource *rbuffer; + u32 vgt_dma_index_type, vgt_draw_initiator, mask; + struct r600_draw rdraw; + struct r600_pipe_state vgt; + struct r600_drawl draw = {}; + unsigned prim; + + r600_flush_depth_textures(rctx); + u_vbuf_mgr_draw_begin(rctx->vbuf_mgr, info, NULL, NULL); + r600_vertex_buffer_update(rctx); + + draw.info = *info; + draw.ctx = ctx; + if (info->indexed && rctx->index_buffer.buffer) { + draw.info.start += rctx->index_buffer.offset / rctx->index_buffer.index_size; + pipe_resource_reference(&draw.index_buffer, rctx->index_buffer.buffer); + + r600_translate_index_buffer(rctx, &draw.index_buffer, + &rctx->index_buffer.index_size, + &draw.info.start, + info->count); + + draw.index_size = rctx->index_buffer.index_size; + draw.index_buffer_offset = draw.info.start * draw.index_size; + draw.info.start = 0; + + if (u_vbuf_resource(draw.index_buffer)->user_ptr) { + r600_upload_index_buffer(rctx, &draw); + } + } else { + draw.info.index_bias = info->start; + } + + switch (draw.index_size) { + case 2: + vgt_draw_initiator = 0; + vgt_dma_index_type = 0; + break; + case 4: + vgt_draw_initiator = 0; + vgt_dma_index_type = 1; + break; + case 0: + vgt_draw_initiator = 2; + vgt_dma_index_type = 0; + break; + default: + R600_ERR("unsupported index size %d\n", draw.index_size); + return; + } + if (r600_conv_pipe_prim(draw.info.mode, &prim)) + return; + if (unlikely(rctx->ps_shader == NULL)) { + R600_ERR("missing vertex shader\n"); + return; + } + if (unlikely(rctx->vs_shader == NULL)) { + R600_ERR("missing vertex shader\n"); + return; + } + /* there should be enough input */ + if (rctx->vertex_elements->count < rctx->vs_shader->shader.bc.nresource) { + R600_ERR("%d resources provided, expecting %d\n", + rctx->vertex_elements->count, rctx->vs_shader->shader.bc.nresource); + return; + } + + r600_spi_update(rctx); + + mask = 0; + for (int i = 0; i < rctx->framebuffer.nr_cbufs; i++) { + mask |= (0xF << (i * 4)); + } + + vgt.id = R600_PIPE_STATE_VGT; + vgt.nregs = 0; + r600_pipe_state_add_reg(&vgt, R_008958_VGT_PRIMITIVE_TYPE, prim, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(&vgt, R_028408_VGT_INDX_OFFSET, draw.info.index_bias, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(&vgt, R_028400_VGT_MAX_VTX_INDX, draw.info.max_index, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(&vgt, R_028404_VGT_MIN_VTX_INDX, draw.info.min_index, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(&vgt, R_028238_CB_TARGET_MASK, rctx->cb_target_mask & mask, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(&vgt, R_03CFF0_SQ_VTX_BASE_VTX_LOC, 0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(&vgt, R_03CFF4_SQ_VTX_START_INST_LOC, draw.info.start_instance, 0xFFFFFFFF, NULL); + r600_context_pipe_state_set(&rctx->ctx, &vgt); + + rdraw.vgt_num_indices = draw.info.count; + rdraw.vgt_num_instances = draw.info.instance_count; + rdraw.vgt_index_type = vgt_dma_index_type; + rdraw.vgt_draw_initiator = vgt_draw_initiator; + rdraw.indices = NULL; + if (draw.index_buffer) { + rbuffer = (struct r600_resource*)draw.index_buffer; + rdraw.indices = rbuffer->bo; + rdraw.indices_bo_offset = draw.index_buffer_offset; + } + + if (rctx->family >= CHIP_CEDAR) { + evergreen_context_draw(&rctx->ctx, &rdraw); + } else { + r600_context_draw(&rctx->ctx, &rdraw); + } + + if (rctx->framebuffer.zsbuf) + { + struct pipe_resource *tex = rctx->framebuffer.zsbuf->texture; + ((struct r600_resource_texture *)tex)->dirty_db = TRUE; + } + + pipe_resource_reference(&draw.index_buffer, NULL); + + u_vbuf_mgr_draw_end(rctx->vbuf_mgr); +} diff --git a/src/gallium/drivers/r600/r600_state_inlines.h b/src/gallium/drivers/r600/r600_state_inlines.h index d994196e19..3d0360485a 100644 --- a/src/gallium/drivers/r600/r600_state_inlines.h +++ b/src/gallium/drivers/r600/r600_state_inlines.h @@ -253,9 +253,13 @@ static inline unsigned r600_tex_dim(unsigned dim) default: case PIPE_TEXTURE_1D: return V_038000_SQ_TEX_DIM_1D; + case PIPE_TEXTURE_1D_ARRAY: + return V_038000_SQ_TEX_DIM_1D_ARRAY; case PIPE_TEXTURE_2D: case PIPE_TEXTURE_RECT: return V_038000_SQ_TEX_DIM_2D; + case PIPE_TEXTURE_2D_ARRAY: + return V_038000_SQ_TEX_DIM_2D_ARRAY; case PIPE_TEXTURE_3D: return V_038000_SQ_TEX_DIM_3D; case PIPE_TEXTURE_CUBE: @@ -285,10 +289,14 @@ static inline uint32_t r600_translate_colorswap(enum pipe_format format) return V_0280A0_SWAP_ALT_REV; case PIPE_FORMAT_I8_UNORM: case PIPE_FORMAT_L8_UNORM: + case PIPE_FORMAT_L8_SRGB: case PIPE_FORMAT_R8_UNORM: case PIPE_FORMAT_R8_SNORM: return V_0280A0_SWAP_STD; + case PIPE_FORMAT_L4A4_UNORM: + return V_0280A0_SWAP_ALT; + /* 16-bit buffers. */ case PIPE_FORMAT_B5G6R5_UNORM: return V_0280A0_SWAP_STD_REV; @@ -305,6 +313,7 @@ static inline uint32_t r600_translate_colorswap(enum pipe_format format) return V_0280A0_SWAP_STD; case PIPE_FORMAT_L8A8_UNORM: + case PIPE_FORMAT_L8A8_SRGB: return V_0280A0_SWAP_ALT; case PIPE_FORMAT_R8G8_UNORM: return V_0280A0_SWAP_STD; @@ -327,6 +336,7 @@ static inline uint32_t r600_translate_colorswap(enum pipe_format format) case PIPE_FORMAT_X8R8G8B8_UNORM: return V_0280A0_SWAP_ALT_REV; case PIPE_FORMAT_R8G8B8A8_SNORM: + case PIPE_FORMAT_R8G8B8A8_UNORM: case PIPE_FORMAT_R8G8B8X8_UNORM: return V_0280A0_SWAP_STD; @@ -345,9 +355,11 @@ static inline uint32_t r600_translate_colorswap(enum pipe_format format) case PIPE_FORMAT_R10G10B10A2_UNORM: case PIPE_FORMAT_R10G10B10X2_SNORM: - case PIPE_FORMAT_B10G10R10A2_UNORM: case PIPE_FORMAT_R10SG10SB10SA2U_NORM: - return V_0280A0_SWAP_STD_REV; + return V_0280A0_SWAP_STD; + + case PIPE_FORMAT_B10G10R10A2_UNORM: + return V_0280A0_SWAP_ALT; case PIPE_FORMAT_R16G16_UNORM: return V_0280A0_SWAP_STD; @@ -355,14 +367,13 @@ static inline uint32_t r600_translate_colorswap(enum pipe_format format) /* 64-bit buffers. */ case PIPE_FORMAT_R16G16B16A16_UNORM: case PIPE_FORMAT_R16G16B16A16_SNORM: - // return FMT_16_16_16_16; case PIPE_FORMAT_R16G16B16A16_FLOAT: - // return FMT_16_16_16_16_FLOAT; /* 128-bit buffers. */ case PIPE_FORMAT_R32G32B32A32_FLOAT: - // return FMT_32_32_32_32_FLOAT; - return 0; + case PIPE_FORMAT_R32G32B32A32_SNORM: + case PIPE_FORMAT_R32G32B32A32_UNORM: + return V_0280A0_SWAP_STD; default: R600_ERR("unsupported colorswap format %d\n", format); return ~0; @@ -373,10 +384,14 @@ static inline uint32_t r600_translate_colorswap(enum pipe_format format) static INLINE uint32_t r600_translate_colorformat(enum pipe_format format) { switch (format) { + case PIPE_FORMAT_L4A4_UNORM: + return V_0280A0_COLOR_4_4; + /* 8-bit buffers. */ case PIPE_FORMAT_A8_UNORM: case PIPE_FORMAT_I8_UNORM: case PIPE_FORMAT_L8_UNORM: + case PIPE_FORMAT_L8_SRGB: case PIPE_FORMAT_R8_UNORM: case PIPE_FORMAT_R8_SNORM: return V_0280A0_COLOR_8; @@ -397,6 +412,7 @@ static INLINE uint32_t r600_translate_colorformat(enum pipe_format format) return V_0280A0_COLOR_16; case PIPE_FORMAT_L8A8_UNORM: + case PIPE_FORMAT_L8A8_SRGB: case PIPE_FORMAT_R8G8_UNORM: return V_0280A0_COLOR_8_8; @@ -423,7 +439,7 @@ static INLINE uint32_t r600_translate_colorformat(enum pipe_format format) case PIPE_FORMAT_R10G10B10X2_SNORM: case PIPE_FORMAT_B10G10R10A2_UNORM: case PIPE_FORMAT_R10SG10SB10SA2U_NORM: - return V_0280A0_COLOR_10_10_10_2; + return V_0280A0_COLOR_2_10_10_10; case PIPE_FORMAT_Z24X8_UNORM: case PIPE_FORMAT_Z24_UNORM_S8_USCALED: @@ -469,6 +485,9 @@ static INLINE uint32_t r600_translate_colorformat(enum pipe_format format) return V_0280A0_COLOR_32_32_32_FLOAT; case PIPE_FORMAT_R32G32B32A32_FLOAT: return V_0280A0_COLOR_32_32_32_32_FLOAT; + case PIPE_FORMAT_R32G32B32A32_SNORM: + case PIPE_FORMAT_R32G32B32A32_UNORM: + return V_0280A0_COLOR_32_32_32_32; /* YUV buffers. */ case PIPE_FORMAT_UYVY: @@ -479,9 +498,9 @@ static INLINE uint32_t r600_translate_colorformat(enum pipe_format format) } } -static INLINE boolean r600_is_sampler_format_supported(enum pipe_format format) +static INLINE boolean r600_is_sampler_format_supported(struct pipe_screen *screen, enum pipe_format format) { - return r600_translate_texformat(format, NULL, NULL, NULL) != ~0; + return r600_translate_texformat(screen, format, NULL, NULL, NULL) != ~0; } static INLINE boolean r600_is_colorbuffer_format_supported(enum pipe_format format) @@ -495,21 +514,13 @@ static INLINE boolean r600_is_zs_format_supported(enum pipe_format format) return r600_translate_dbformat(format) != ~0; } -static INLINE boolean r600_is_vertex_format_supported(enum pipe_format format) +static INLINE boolean r600_is_vertex_format_supported(enum pipe_format format, + enum radeon_family family) { - return r600_translate_colorformat(format) != ~0; -} - -static INLINE uint32_t r600_translate_vertex_data_type(enum pipe_format format) -{ - uint32_t result = 0; - const struct util_format_description *desc; unsigned i; - - desc = util_format_description(format); - if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN) { - goto out_unknown; - } + const struct util_format_description *desc = util_format_description(format); + if (!desc) + return FALSE; /* Find the first non-VOID channel. */ for (i = 0; i < 4; i++) { @@ -517,122 +528,23 @@ static INLINE uint32_t r600_translate_vertex_data_type(enum pipe_format format) break; } } - - switch (desc->channel[i].type) { - /* Half-floats, floats, doubles */ - case UTIL_FORMAT_TYPE_FLOAT: - switch (desc->channel[i].size) { - case 16: - switch (desc->nr_channels) { - case 1: - result = FMT_16_FLOAT; - break; - case 2: - result = FMT_16_16_FLOAT; - break; - case 3: - result = FMT_16_16_16_FLOAT; - break; - case 4: - result = FMT_16_16_16_16_FLOAT; - break; - } - break; - case 32: - switch (desc->nr_channels) { - case 1: - result = FMT_32_FLOAT; - break; - case 2: - result = FMT_32_32_FLOAT; - break; - case 3: - result = FMT_32_32_32_FLOAT; - break; - case 4: - result = FMT_32_32_32_32_FLOAT; - break; - } - break; - default: - goto out_unknown; - } - break; - /* Unsigned ints */ - case UTIL_FORMAT_TYPE_UNSIGNED: - /* Signed ints */ - case UTIL_FORMAT_TYPE_SIGNED: - switch (desc->channel[i].size) { - case 8: - switch (desc->nr_channels) { - case 1: - result = FMT_8; - break; - case 2: - result = FMT_8_8; - break; - case 3: - // result = FMT_8_8_8; /* fails piglit draw-vertices test */ - // break; - case 4: - result = FMT_8_8_8_8; - break; - } - break; - case 16: - switch (desc->nr_channels) { - case 1: - result = FMT_16; - break; - case 2: - result = FMT_16_16; - break; - case 3: - // result = FMT_16_16_16; /* fails piglit draw-vertices test */ - // break; - case 4: - result = FMT_16_16_16_16; - break; - } - break; - case 32: - switch (desc->nr_channels) { - case 1: - result = FMT_32; - break; - case 2: - result = FMT_32_32; - break; - case 3: - result = FMT_32_32_32; - break; - case 4: - result = FMT_32_32_32_32; - break; - } - break; - default: - goto out_unknown; - } - break; - default: - goto out_unknown; - } - - result = S_038008_DATA_FORMAT(result); - - if (desc->channel[i].type == UTIL_FORMAT_TYPE_SIGNED) { - result |= S_038008_FORMAT_COMP_ALL(1); - } - if (desc->channel[i].normalized) { - result |= S_038008_NUM_FORMAT_ALL(0); - } else { - result |= S_038008_NUM_FORMAT_ALL(2); - } - return result; -out_unknown: - R600_ERR("unsupported vertex format %s\n", util_format_name(format)); - return ~0; + if (i == 4) + return FALSE; + + /* No fixed, no double. */ + if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN || + desc->channel[i].type == UTIL_FORMAT_TYPE_FIXED || + (desc->channel[i].size == 64 && + desc->channel[i].type == UTIL_FORMAT_TYPE_FLOAT)) + return FALSE; + + /* No scaled/norm formats with 32 bits per channel. */ + if (desc->channel[i].size == 32 && + (desc->channel[i].type == UTIL_FORMAT_TYPE_SIGNED || + desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED)) + return FALSE; + + return TRUE; } #endif diff --git a/src/gallium/drivers/r600/r600_states_inc.h b/src/gallium/drivers/r600/r600_states_inc.h deleted file mode 100644 index 1c8075ebdb..0000000000 --- a/src/gallium/drivers/r600/r600_states_inc.h +++ /dev/null @@ -1,543 +0,0 @@ -/* This file is autogenerated from r600_states.h - do not edit directly */ -/* autogenerating script is gen_r600_states.py */ - -/* R600_CONFIG */ -#define R600_CONFIG__SQ_CONFIG 0 -#define R600_CONFIG__SQ_GPR_RESOURCE_MGMT_1 1 -#define R600_CONFIG__SQ_GPR_RESOURCE_MGMT_2 2 -#define R600_CONFIG__SQ_THREAD_RESOURCE_MGMT 3 -#define R600_CONFIG__SQ_STACK_RESOURCE_MGMT_1 4 -#define R600_CONFIG__SQ_STACK_RESOURCE_MGMT_2 5 -#define R600_CONFIG__SQ_DYN_GPR_CNTL_PS_FLUSH_REQ 6 -#define R600_CONFIG__TA_CNTL_AUX 7 -#define R600_CONFIG__VC_ENHANCE 8 -#define R600_CONFIG__DB_DEBUG 9 -#define R600_CONFIG__DB_WATERMARKS 10 -#define R600_CONFIG__SX_MISC 11 -#define R600_CONFIG__SPI_THREAD_GROUPING 12 -#define R600_CONFIG__SQ_ESGS_RING_ITEMSIZE 13 -#define R600_CONFIG__SQ_GSVS_RING_ITEMSIZE 14 -#define R600_CONFIG__SQ_ESTMP_RING_ITEMSIZE 15 -#define R600_CONFIG__SQ_GSTMP_RING_ITEMSIZE 16 -#define R600_CONFIG__SQ_VSTMP_RING_ITEMSIZE 17 -#define R600_CONFIG__SQ_PSTMP_RING_ITEMSIZE 18 -#define R600_CONFIG__SQ_FBUF_RING_ITEMSIZE 19 -#define R600_CONFIG__SQ_REDUC_RING_ITEMSIZE 20 -#define R600_CONFIG__SQ_GS_VERT_ITEMSIZE 21 -#define R600_CONFIG__VGT_OUTPUT_PATH_CNTL 22 -#define R600_CONFIG__VGT_HOS_CNTL 23 -#define R600_CONFIG__VGT_HOS_MAX_TESS_LEVEL 24 -#define R600_CONFIG__VGT_HOS_MIN_TESS_LEVEL 25 -#define R600_CONFIG__VGT_HOS_REUSE_DEPTH 26 -#define R600_CONFIG__VGT_GROUP_PRIM_TYPE 27 -#define R600_CONFIG__VGT_GROUP_FIRST_DECR 28 -#define R600_CONFIG__VGT_GROUP_DECR 29 -#define R600_CONFIG__VGT_GROUP_VECT_0_CNTL 30 -#define R600_CONFIG__VGT_GROUP_VECT_1_CNTL 31 -#define R600_CONFIG__VGT_GROUP_VECT_0_FMT_CNTL 32 -#define R600_CONFIG__VGT_GROUP_VECT_1_FMT_CNTL 33 -#define R600_CONFIG__VGT_GS_MODE 34 -#define R600_CONFIG__PA_SC_MODE_CNTL 35 -#define R600_CONFIG__VGT_STRMOUT_EN 36 -#define R600_CONFIG__VGT_REUSE_OFF 37 -#define R600_CONFIG__VGT_VTX_CNT_EN 38 -#define R600_CONFIG__VGT_STRMOUT_BUFFER_EN 39 -#define R600_CONFIG_SIZE 40 -#define R600_CONFIG_PM4 128 - -/* R600_CB_CNTL */ -#define R600_CB_CNTL__CB_CLEAR_RED 0 -#define R600_CB_CNTL__CB_CLEAR_GREEN 1 -#define R600_CB_CNTL__CB_CLEAR_BLUE 2 -#define R600_CB_CNTL__CB_CLEAR_ALPHA 3 -#define R600_CB_CNTL__CB_SHADER_MASK 4 -#define R600_CB_CNTL__CB_TARGET_MASK 5 -#define R600_CB_CNTL__CB_FOG_RED 6 -#define R600_CB_CNTL__CB_FOG_GREEN 7 -#define R600_CB_CNTL__CB_FOG_BLUE 8 -#define R600_CB_CNTL__CB_COLOR_CONTROL 9 -#define R600_CB_CNTL__PA_SC_AA_CONFIG 10 -#define R600_CB_CNTL__PA_SC_AA_SAMPLE_LOCS_MCTX 11 -#define R600_CB_CNTL__PA_SC_AA_SAMPLE_LOCS_8S_WD1_MCTX 12 -#define R600_CB_CNTL__CB_CLRCMP_CONTROL 13 -#define R600_CB_CNTL__CB_CLRCMP_SRC 14 -#define R600_CB_CNTL__CB_CLRCMP_DST 15 -#define R600_CB_CNTL__CB_CLRCMP_MSK 16 -#define R600_CB_CNTL__PA_SC_AA_MASK 17 -#define R600_CB_CNTL__CB_SHADER_CONTROL 18 -#define R600_CB_CNTL_SIZE 19 -#define R600_CB_CNTL_PM4 128 - -/* R600_RASTERIZER */ -#define R600_RASTERIZER__SPI_INTERP_CONTROL_0 0 -#define R600_RASTERIZER__PA_CL_CLIP_CNTL 1 -#define R600_RASTERIZER__PA_SU_SC_MODE_CNTL 2 -#define R600_RASTERIZER__PA_CL_VS_OUT_CNTL 3 -#define R600_RASTERIZER__PA_CL_NANINF_CNTL 4 -#define R600_RASTERIZER__PA_SU_POINT_SIZE 5 -#define R600_RASTERIZER__PA_SU_POINT_MINMAX 6 -#define R600_RASTERIZER__PA_SU_LINE_CNTL 7 -#define R600_RASTERIZER__PA_SC_LINE_STIPPLE 8 -#define R600_RASTERIZER__PA_SC_MPASS_PS_CNTL 9 -#define R600_RASTERIZER__PA_SC_LINE_CNTL 10 -#define R600_RASTERIZER__PA_CL_GB_VERT_CLIP_ADJ 11 -#define R600_RASTERIZER__PA_CL_GB_VERT_DISC_ADJ 12 -#define R600_RASTERIZER__PA_CL_GB_HORZ_CLIP_ADJ 13 -#define R600_RASTERIZER__PA_CL_GB_HORZ_DISC_ADJ 14 -#define R600_RASTERIZER__PA_SU_POLY_OFFSET_DB_FMT_CNTL 15 -#define R600_RASTERIZER__PA_SU_POLY_OFFSET_CLAMP 16 -#define R600_RASTERIZER__PA_SU_POLY_OFFSET_FRONT_SCALE 17 -#define R600_RASTERIZER__PA_SU_POLY_OFFSET_FRONT_OFFSET 18 -#define R600_RASTERIZER__PA_SU_POLY_OFFSET_BACK_SCALE 19 -#define R600_RASTERIZER__PA_SU_POLY_OFFSET_BACK_OFFSET 20 -#define R600_RASTERIZER_SIZE 21 -#define R600_RASTERIZER_PM4 128 - -/* R600_VIEWPORT */ -#define R600_VIEWPORT__PA_SC_VPORT_ZMIN_0 0 -#define R600_VIEWPORT__PA_SC_VPORT_ZMAX_0 1 -#define R600_VIEWPORT__PA_CL_VPORT_XSCALE_0 2 -#define R600_VIEWPORT__PA_CL_VPORT_YSCALE_0 3 -#define R600_VIEWPORT__PA_CL_VPORT_ZSCALE_0 4 -#define R600_VIEWPORT__PA_CL_VPORT_XOFFSET_0 5 -#define R600_VIEWPORT__PA_CL_VPORT_YOFFSET_0 6 -#define R600_VIEWPORT__PA_CL_VPORT_ZOFFSET_0 7 -#define R600_VIEWPORT__PA_CL_VTE_CNTL 8 -#define R600_VIEWPORT_SIZE 9 -#define R600_VIEWPORT_PM4 128 - -/* R600_SCISSOR */ -#define R600_SCISSOR__PA_SC_SCREEN_SCISSOR_TL 0 -#define R600_SCISSOR__PA_SC_SCREEN_SCISSOR_BR 1 -#define R600_SCISSOR__PA_SC_WINDOW_OFFSET 2 -#define R600_SCISSOR__PA_SC_WINDOW_SCISSOR_TL 3 -#define R600_SCISSOR__PA_SC_WINDOW_SCISSOR_BR 4 -#define R600_SCISSOR__PA_SC_CLIPRECT_RULE 5 -#define R600_SCISSOR__PA_SC_CLIPRECT_0_TL 6 -#define R600_SCISSOR__PA_SC_CLIPRECT_0_BR 7 -#define R600_SCISSOR__PA_SC_CLIPRECT_1_TL 8 -#define R600_SCISSOR__PA_SC_CLIPRECT_1_BR 9 -#define R600_SCISSOR__PA_SC_CLIPRECT_2_TL 10 -#define R600_SCISSOR__PA_SC_CLIPRECT_2_BR 11 -#define R600_SCISSOR__PA_SC_CLIPRECT_3_TL 12 -#define R600_SCISSOR__PA_SC_CLIPRECT_3_BR 13 -#define R600_SCISSOR__PA_SC_EDGERULE 14 -#define R600_SCISSOR__PA_SC_GENERIC_SCISSOR_TL 15 -#define R600_SCISSOR__PA_SC_GENERIC_SCISSOR_BR 16 -#define R600_SCISSOR__PA_SC_VPORT_SCISSOR_0_TL 17 -#define R600_SCISSOR__PA_SC_VPORT_SCISSOR_0_BR 18 -#define R600_SCISSOR_SIZE 19 -#define R600_SCISSOR_PM4 128 - -/* R600_BLEND */ -#define R600_BLEND__CB_BLEND_RED 0 -#define R600_BLEND__CB_BLEND_GREEN 1 -#define R600_BLEND__CB_BLEND_BLUE 2 -#define R600_BLEND__CB_BLEND_ALPHA 3 -#define R600_BLEND__CB_BLEND0_CONTROL 4 -#define R600_BLEND__CB_BLEND1_CONTROL 5 -#define R600_BLEND__CB_BLEND2_CONTROL 6 -#define R600_BLEND__CB_BLEND3_CONTROL 7 -#define R600_BLEND__CB_BLEND4_CONTROL 8 -#define R600_BLEND__CB_BLEND5_CONTROL 9 -#define R600_BLEND__CB_BLEND6_CONTROL 10 -#define R600_BLEND__CB_BLEND7_CONTROL 11 -#define R600_BLEND__CB_BLEND_CONTROL 12 -#define R600_BLEND_SIZE 13 -#define R600_BLEND_PM4 128 - -/* R600_DSA */ -#define R600_DSA__DB_STENCIL_CLEAR 0 -#define R600_DSA__DB_DEPTH_CLEAR 1 -#define R600_DSA__SX_ALPHA_TEST_CONTROL 2 -#define R600_DSA__DB_STENCILREFMASK 3 -#define R600_DSA__DB_STENCILREFMASK_BF 4 -#define R600_DSA__SX_ALPHA_REF 5 -#define R600_DSA__SPI_FOG_FUNC_SCALE 6 -#define R600_DSA__SPI_FOG_FUNC_BIAS 7 -#define R600_DSA__SPI_FOG_CNTL 8 -#define R600_DSA__DB_DEPTH_CONTROL 9 -#define R600_DSA__DB_SHADER_CONTROL 10 -#define R600_DSA__DB_RENDER_CONTROL 11 -#define R600_DSA__DB_RENDER_OVERRIDE 12 -#define R600_DSA__DB_SRESULTS_COMPARE_STATE1 13 -#define R600_DSA__DB_PRELOAD_CONTROL 14 -#define R600_DSA__DB_ALPHA_TO_MASK 15 -#define R600_DSA_SIZE 16 -#define R600_DSA_PM4 128 - -/* R600_VS_SHADER */ -#define R600_VS_SHADER__SQ_VTX_SEMANTIC_0 0 -#define R600_VS_SHADER__SQ_VTX_SEMANTIC_1 1 -#define R600_VS_SHADER__SQ_VTX_SEMANTIC_2 2 -#define R600_VS_SHADER__SQ_VTX_SEMANTIC_3 3 -#define R600_VS_SHADER__SQ_VTX_SEMANTIC_4 4 -#define R600_VS_SHADER__SQ_VTX_SEMANTIC_5 5 -#define R600_VS_SHADER__SQ_VTX_SEMANTIC_6 6 -#define R600_VS_SHADER__SQ_VTX_SEMANTIC_7 7 -#define R600_VS_SHADER__SQ_VTX_SEMANTIC_8 8 -#define R600_VS_SHADER__SQ_VTX_SEMANTIC_9 9 -#define R600_VS_SHADER__SQ_VTX_SEMANTIC_10 10 -#define R600_VS_SHADER__SQ_VTX_SEMANTIC_11 11 -#define R600_VS_SHADER__SQ_VTX_SEMANTIC_12 12 -#define R600_VS_SHADER__SQ_VTX_SEMANTIC_13 13 -#define R600_VS_SHADER__SQ_VTX_SEMANTIC_14 14 -#define R600_VS_SHADER__SQ_VTX_SEMANTIC_15 15 -#define R600_VS_SHADER__SQ_VTX_SEMANTIC_16 16 -#define R600_VS_SHADER__SQ_VTX_SEMANTIC_17 17 -#define R600_VS_SHADER__SQ_VTX_SEMANTIC_18 18 -#define R600_VS_SHADER__SQ_VTX_SEMANTIC_19 19 -#define R600_VS_SHADER__SQ_VTX_SEMANTIC_20 20 -#define R600_VS_SHADER__SQ_VTX_SEMANTIC_21 21 -#define R600_VS_SHADER__SQ_VTX_SEMANTIC_22 22 -#define R600_VS_SHADER__SQ_VTX_SEMANTIC_23 23 -#define R600_VS_SHADER__SQ_VTX_SEMANTIC_24 24 -#define R600_VS_SHADER__SQ_VTX_SEMANTIC_25 25 -#define R600_VS_SHADER__SQ_VTX_SEMANTIC_26 26 -#define R600_VS_SHADER__SQ_VTX_SEMANTIC_27 27 -#define R600_VS_SHADER__SQ_VTX_SEMANTIC_28 28 -#define R600_VS_SHADER__SQ_VTX_SEMANTIC_29 29 -#define R600_VS_SHADER__SQ_VTX_SEMANTIC_30 30 -#define R600_VS_SHADER__SQ_VTX_SEMANTIC_31 31 -#define R600_VS_SHADER__SPI_VS_OUT_ID_0 32 -#define R600_VS_SHADER__SPI_VS_OUT_ID_1 33 -#define R600_VS_SHADER__SPI_VS_OUT_ID_2 34 -#define R600_VS_SHADER__SPI_VS_OUT_ID_3 35 -#define R600_VS_SHADER__SPI_VS_OUT_ID_4 36 -#define R600_VS_SHADER__SPI_VS_OUT_ID_5 37 -#define R600_VS_SHADER__SPI_VS_OUT_ID_6 38 -#define R600_VS_SHADER__SPI_VS_OUT_ID_7 39 -#define R600_VS_SHADER__SPI_VS_OUT_ID_8 40 -#define R600_VS_SHADER__SPI_VS_OUT_ID_9 41 -#define R600_VS_SHADER__SPI_VS_OUT_CONFIG 42 -#define R600_VS_SHADER__SQ_PGM_START_VS 43 -#define R600_VS_SHADER__SQ_PGM_RESOURCES_VS 44 -#define R600_VS_SHADER__SQ_PGM_START_FS 45 -#define R600_VS_SHADER__SQ_PGM_RESOURCES_FS 46 -#define R600_VS_SHADER__SQ_PGM_CF_OFFSET_VS 47 -#define R600_VS_SHADER__SQ_PGM_CF_OFFSET_FS 48 -#define R600_VS_SHADER_SIZE 49 -#define R600_VS_SHADER_PM4 128 - -/* R600_PS_SHADER */ -#define R600_PS_SHADER__SPI_PS_INPUT_CNTL_0 0 -#define R600_PS_SHADER__SPI_PS_INPUT_CNTL_1 1 -#define R600_PS_SHADER__SPI_PS_INPUT_CNTL_2 2 -#define R600_PS_SHADER__SPI_PS_INPUT_CNTL_3 3 -#define R600_PS_SHADER__SPI_PS_INPUT_CNTL_4 4 -#define R600_PS_SHADER__SPI_PS_INPUT_CNTL_5 5 -#define R600_PS_SHADER__SPI_PS_INPUT_CNTL_6 6 -#define R600_PS_SHADER__SPI_PS_INPUT_CNTL_7 7 -#define R600_PS_SHADER__SPI_PS_INPUT_CNTL_8 8 -#define R600_PS_SHADER__SPI_PS_INPUT_CNTL_9 9 -#define R600_PS_SHADER__SPI_PS_INPUT_CNTL_10 10 -#define R600_PS_SHADER__SPI_PS_INPUT_CNTL_11 11 -#define R600_PS_SHADER__SPI_PS_INPUT_CNTL_12 12 -#define R600_PS_SHADER__SPI_PS_INPUT_CNTL_13 13 -#define R600_PS_SHADER__SPI_PS_INPUT_CNTL_14 14 -#define R600_PS_SHADER__SPI_PS_INPUT_CNTL_15 15 -#define R600_PS_SHADER__SPI_PS_INPUT_CNTL_16 16 -#define R600_PS_SHADER__SPI_PS_INPUT_CNTL_17 17 -#define R600_PS_SHADER__SPI_PS_INPUT_CNTL_18 18 -#define R600_PS_SHADER__SPI_PS_INPUT_CNTL_19 19 -#define R600_PS_SHADER__SPI_PS_INPUT_CNTL_20 20 -#define R600_PS_SHADER__SPI_PS_INPUT_CNTL_21 21 -#define R600_PS_SHADER__SPI_PS_INPUT_CNTL_22 22 -#define R600_PS_SHADER__SPI_PS_INPUT_CNTL_23 23 -#define R600_PS_SHADER__SPI_PS_INPUT_CNTL_24 24 -#define R600_PS_SHADER__SPI_PS_INPUT_CNTL_25 25 -#define R600_PS_SHADER__SPI_PS_INPUT_CNTL_26 26 -#define R600_PS_SHADER__SPI_PS_INPUT_CNTL_27 27 -#define R600_PS_SHADER__SPI_PS_INPUT_CNTL_28 28 -#define R600_PS_SHADER__SPI_PS_INPUT_CNTL_29 29 -#define R600_PS_SHADER__SPI_PS_INPUT_CNTL_30 30 -#define R600_PS_SHADER__SPI_PS_INPUT_CNTL_31 31 -#define R600_PS_SHADER__SPI_PS_IN_CONTROL_0 32 -#define R600_PS_SHADER__SPI_PS_IN_CONTROL_1 33 -#define R600_PS_SHADER__SPI_INPUT_Z 34 -#define R600_PS_SHADER__SQ_PGM_START_PS 35 -#define R600_PS_SHADER__SQ_PGM_RESOURCES_PS 36 -#define R600_PS_SHADER__SQ_PGM_EXPORTS_PS 37 -#define R600_PS_SHADER__SQ_PGM_CF_OFFSET_PS 38 -#define R600_PS_SHADER_SIZE 39 -#define R600_PS_SHADER_PM4 128 - -/* R600_VS_CBUF */ -#define R600_VS_CBUF__ALU_CONST_BUFFER_SIZE_VS_0 0 -#define R600_VS_CBUF__ALU_CONST_CACHE_VS_0 1 -#define R600_VS_CBUF_SIZE 2 -#define R600_VS_CBUF_PM4 128 - -/* R600_PS_CBUF */ -#define R600_PS_CBUF__ALU_CONST_BUFFER_SIZE_PS_0 0 -#define R600_PS_CBUF__ALU_CONST_CACHE_PS_0 1 -#define R600_PS_CBUF_SIZE 2 -#define R600_PS_CBUF_PM4 128 - -/* R600_PS_CONSTANT */ -#define R600_PS_CONSTANT__SQ_ALU_CONSTANT0_0 0 -#define R600_PS_CONSTANT__SQ_ALU_CONSTANT1_0 1 -#define R600_PS_CONSTANT__SQ_ALU_CONSTANT2_0 2 -#define R600_PS_CONSTANT__SQ_ALU_CONSTANT3_0 3 -#define R600_PS_CONSTANT_SIZE 4 -#define R600_PS_CONSTANT_PM4 128 - -/* R600_VS_CONSTANT */ -#define R600_VS_CONSTANT__SQ_ALU_CONSTANT0_256 0 -#define R600_VS_CONSTANT__SQ_ALU_CONSTANT1_256 1 -#define R600_VS_CONSTANT__SQ_ALU_CONSTANT2_256 2 -#define R600_VS_CONSTANT__SQ_ALU_CONSTANT3_256 3 -#define R600_VS_CONSTANT_SIZE 4 -#define R600_VS_CONSTANT_PM4 128 - -/* R600_UCP */ -#define R600_UCP__PA_CL_UCP0_X 0 -#define R600_UCP__PA_CL_UCP0_Y 1 -#define R600_UCP__PA_CL_UCP0_Z 2 -#define R600_UCP__PA_CL_UCP0_W 3 -#define R600_UCP__PA_CL_UCP1_X 4 -#define R600_UCP__PA_CL_UCP1_Y 5 -#define R600_UCP__PA_CL_UCP1_Z 6 -#define R600_UCP__PA_CL_UCP1_W 7 -#define R600_UCP__PA_CL_UCP2_X 8 -#define R600_UCP__PA_CL_UCP2_Y 9 -#define R600_UCP__PA_CL_UCP2_Z 10 -#define R600_UCP__PA_CL_UCP2_W 11 -#define R600_UCP__PA_CL_UCP3_X 12 -#define R600_UCP__PA_CL_UCP3_Y 13 -#define R600_UCP__PA_CL_UCP3_Z 14 -#define R600_UCP__PA_CL_UCP3_W 15 -#define R600_UCP__PA_CL_UCP4_X 16 -#define R600_UCP__PA_CL_UCP4_Y 17 -#define R600_UCP__PA_CL_UCP4_Z 18 -#define R600_UCP__PA_CL_UCP4_W 19 -#define R600_UCP__PA_CL_UCP5_X 20 -#define R600_UCP__PA_CL_UCP5_Y 21 -#define R600_UCP__PA_CL_UCP5_Z 22 -#define R600_UCP__PA_CL_UCP5_W 23 -#define R600_UCP_SIZE 24 -#define R600_UCP_PM4 128 - -/* R600_PS_RESOURCE */ -#define R600_PS_RESOURCE__RESOURCE0_WORD0 0 -#define R600_PS_RESOURCE__RESOURCE0_WORD1 1 -#define R600_PS_RESOURCE__RESOURCE0_WORD2 2 -#define R600_PS_RESOURCE__RESOURCE0_WORD3 3 -#define R600_PS_RESOURCE__RESOURCE0_WORD4 4 -#define R600_PS_RESOURCE__RESOURCE0_WORD5 5 -#define R600_PS_RESOURCE__RESOURCE0_WORD6 6 -#define R600_PS_RESOURCE_SIZE 7 -#define R600_PS_RESOURCE_PM4 128 - -/* R600_VS_RESOURCE */ -#define R600_VS_RESOURCE__RESOURCE160_WORD0 0 -#define R600_VS_RESOURCE__RESOURCE160_WORD1 1 -#define R600_VS_RESOURCE__RESOURCE160_WORD2 2 -#define R600_VS_RESOURCE__RESOURCE160_WORD3 3 -#define R600_VS_RESOURCE__RESOURCE160_WORD4 4 -#define R600_VS_RESOURCE__RESOURCE160_WORD5 5 -#define R600_VS_RESOURCE__RESOURCE160_WORD6 6 -#define R600_VS_RESOURCE_SIZE 7 -#define R600_VS_RESOURCE_PM4 128 - -/* R600_FS_RESOURCE */ -#define R600_FS_RESOURCE__RESOURCE320_WORD0 0 -#define R600_FS_RESOURCE__RESOURCE320_WORD1 1 -#define R600_FS_RESOURCE__RESOURCE320_WORD2 2 -#define R600_FS_RESOURCE__RESOURCE320_WORD3 3 -#define R600_FS_RESOURCE__RESOURCE320_WORD4 4 -#define R600_FS_RESOURCE__RESOURCE320_WORD5 5 -#define R600_FS_RESOURCE__RESOURCE320_WORD6 6 -#define R600_FS_RESOURCE_SIZE 7 -#define R600_FS_RESOURCE_PM4 128 - -/* R600_GS_RESOURCE */ -#define R600_GS_RESOURCE__RESOURCE336_WORD0 0 -#define R600_GS_RESOURCE__RESOURCE336_WORD1 1 -#define R600_GS_RESOURCE__RESOURCE336_WORD2 2 -#define R600_GS_RESOURCE__RESOURCE336_WORD3 3 -#define R600_GS_RESOURCE__RESOURCE336_WORD4 4 -#define R600_GS_RESOURCE__RESOURCE336_WORD5 5 -#define R600_GS_RESOURCE__RESOURCE336_WORD6 6 -#define R600_GS_RESOURCE_SIZE 7 -#define R600_GS_RESOURCE_PM4 128 - -/* R600_PS_SAMPLER */ -#define R600_PS_SAMPLER__SQ_TEX_SAMPLER_WORD0_0 0 -#define R600_PS_SAMPLER__SQ_TEX_SAMPLER_WORD1_0 1 -#define R600_PS_SAMPLER__SQ_TEX_SAMPLER_WORD2_0 2 -#define R600_PS_SAMPLER_SIZE 3 -#define R600_PS_SAMPLER_PM4 128 - -/* R600_VS_SAMPLER */ -#define R600_VS_SAMPLER__SQ_TEX_SAMPLER_WORD0_18 0 -#define R600_VS_SAMPLER__SQ_TEX_SAMPLER_WORD1_18 1 -#define R600_VS_SAMPLER__SQ_TEX_SAMPLER_WORD2_18 2 -#define R600_VS_SAMPLER_SIZE 3 -#define R600_VS_SAMPLER_PM4 128 - -/* R600_GS_SAMPLER */ -#define R600_GS_SAMPLER__SQ_TEX_SAMPLER_WORD0_36 0 -#define R600_GS_SAMPLER__SQ_TEX_SAMPLER_WORD1_36 1 -#define R600_GS_SAMPLER__SQ_TEX_SAMPLER_WORD2_36 2 -#define R600_GS_SAMPLER_SIZE 3 -#define R600_GS_SAMPLER_PM4 128 - -/* R600_PS_SAMPLER_BORDER */ -#define R600_PS_SAMPLER_BORDER__TD_PS_SAMPLER0_BORDER_RED 0 -#define R600_PS_SAMPLER_BORDER__TD_PS_SAMPLER0_BORDER_GREEN 1 -#define R600_PS_SAMPLER_BORDER__TD_PS_SAMPLER0_BORDER_BLUE 2 -#define R600_PS_SAMPLER_BORDER__TD_PS_SAMPLER0_BORDER_ALPHA 3 -#define R600_PS_SAMPLER_BORDER_SIZE 4 -#define R600_PS_SAMPLER_BORDER_PM4 128 - -/* R600_VS_SAMPLER_BORDER */ -#define R600_VS_SAMPLER_BORDER__TD_VS_SAMPLER0_BORDER_RED 0 -#define R600_VS_SAMPLER_BORDER__TD_VS_SAMPLER0_BORDER_GREEN 1 -#define R600_VS_SAMPLER_BORDER__TD_VS_SAMPLER0_BORDER_BLUE 2 -#define R600_VS_SAMPLER_BORDER__TD_VS_SAMPLER0_BORDER_ALPHA 3 -#define R600_VS_SAMPLER_BORDER_SIZE 4 -#define R600_VS_SAMPLER_BORDER_PM4 128 - -/* R600_GS_SAMPLER_BORDER */ -#define R600_GS_SAMPLER_BORDER__TD_GS_SAMPLER0_BORDER_RED 0 -#define R600_GS_SAMPLER_BORDER__TD_GS_SAMPLER0_BORDER_GREEN 1 -#define R600_GS_SAMPLER_BORDER__TD_GS_SAMPLER0_BORDER_BLUE 2 -#define R600_GS_SAMPLER_BORDER__TD_GS_SAMPLER0_BORDER_ALPHA 3 -#define R600_GS_SAMPLER_BORDER_SIZE 4 -#define R600_GS_SAMPLER_BORDER_PM4 128 - -/* R600_CB0 */ -#define R600_CB0__CB_COLOR0_BASE 0 -#define R600_CB0__CB_COLOR0_INFO 1 -#define R600_CB0__CB_COLOR0_SIZE 2 -#define R600_CB0__CB_COLOR0_VIEW 3 -#define R600_CB0__CB_COLOR0_FRAG 4 -#define R600_CB0__CB_COLOR0_TILE 5 -#define R600_CB0__CB_COLOR0_MASK 6 -#define R600_CB0_SIZE 7 -#define R600_CB0_PM4 128 - -/* R600_CB1 */ -#define R600_CB1__CB_COLOR1_BASE 0 -#define R600_CB1__CB_COLOR1_INFO 1 -#define R600_CB1__CB_COLOR1_SIZE 2 -#define R600_CB1__CB_COLOR1_VIEW 3 -#define R600_CB1__CB_COLOR1_FRAG 4 -#define R600_CB1__CB_COLOR1_TILE 5 -#define R600_CB1__CB_COLOR1_MASK 6 -#define R600_CB1_SIZE 7 -#define R600_CB1_PM4 128 - -/* R600_CB2 */ -#define R600_CB2__CB_COLOR2_BASE 0 -#define R600_CB2__CB_COLOR2_INFO 1 -#define R600_CB2__CB_COLOR2_SIZE 2 -#define R600_CB2__CB_COLOR2_VIEW 3 -#define R600_CB2__CB_COLOR2_FRAG 4 -#define R600_CB2__CB_COLOR2_TILE 5 -#define R600_CB2__CB_COLOR2_MASK 6 -#define R600_CB2_SIZE 7 -#define R600_CB2_PM4 128 - -/* R600_CB3 */ -#define R600_CB3__CB_COLOR3_BASE 0 -#define R600_CB3__CB_COLOR3_INFO 1 -#define R600_CB3__CB_COLOR3_SIZE 2 -#define R600_CB3__CB_COLOR3_VIEW 3 -#define R600_CB3__CB_COLOR3_FRAG 4 -#define R600_CB3__CB_COLOR3_TILE 5 -#define R600_CB3__CB_COLOR3_MASK 6 -#define R600_CB3_SIZE 7 -#define R600_CB3_PM4 128 - -/* R600_CB4 */ -#define R600_CB4__CB_COLOR4_BASE 0 -#define R600_CB4__CB_COLOR4_INFO 1 -#define R600_CB4__CB_COLOR4_SIZE 2 -#define R600_CB4__CB_COLOR4_VIEW 3 -#define R600_CB4__CB_COLOR4_FRAG 4 -#define R600_CB4__CB_COLOR4_TILE 5 -#define R600_CB4__CB_COLOR4_MASK 6 -#define R600_CB4_SIZE 7 -#define R600_CB4_PM4 128 - -/* R600_CB5 */ -#define R600_CB5__CB_COLOR5_BASE 0 -#define R600_CB5__CB_COLOR5_INFO 1 -#define R600_CB5__CB_COLOR5_SIZE 2 -#define R600_CB5__CB_COLOR5_VIEW 3 -#define R600_CB5__CB_COLOR5_FRAG 4 -#define R600_CB5__CB_COLOR5_TILE 5 -#define R600_CB5__CB_COLOR5_MASK 6 -#define R600_CB5_SIZE 7 -#define R600_CB5_PM4 128 - -/* R600_CB6 */ -#define R600_CB6__CB_COLOR6_BASE 0 -#define R600_CB6__CB_COLOR6_INFO 1 -#define R600_CB6__CB_COLOR6_SIZE 2 -#define R600_CB6__CB_COLOR6_VIEW 3 -#define R600_CB6__CB_COLOR6_FRAG 4 -#define R600_CB6__CB_COLOR6_TILE 5 -#define R600_CB6__CB_COLOR6_MASK 6 -#define R600_CB6_SIZE 7 -#define R600_CB6_PM4 128 - -/* R600_CB7 */ -#define R600_CB7__CB_COLOR7_BASE 0 -#define R600_CB7__CB_COLOR7_INFO 1 -#define R600_CB7__CB_COLOR7_SIZE 2 -#define R600_CB7__CB_COLOR7_VIEW 3 -#define R600_CB7__CB_COLOR7_FRAG 4 -#define R600_CB7__CB_COLOR7_TILE 5 -#define R600_CB7__CB_COLOR7_MASK 6 -#define R600_CB7_SIZE 7 -#define R600_CB7_PM4 128 - -/* R600_DB */ -#define R600_DB__DB_DEPTH_BASE 0 -#define R600_DB__DB_DEPTH_SIZE 1 -#define R600_DB__DB_DEPTH_VIEW 2 -#define R600_DB__DB_DEPTH_INFO 3 -#define R600_DB__DB_HTILE_SURFACE 4 -#define R600_DB__DB_PREFETCH_LIMIT 5 -#define R600_DB_SIZE 6 -#define R600_DB_PM4 128 - -/* R600_VGT */ -#define R600_VGT__VGT_PRIMITIVE_TYPE 0 -#define R600_VGT__VGT_MAX_VTX_INDX 1 -#define R600_VGT__VGT_MIN_VTX_INDX 2 -#define R600_VGT__VGT_INDX_OFFSET 3 -#define R600_VGT__VGT_MULTI_PRIM_IB_RESET_INDX 4 -#define R600_VGT__VGT_DMA_INDEX_TYPE 5 -#define R600_VGT__VGT_PRIMITIVEID_EN 6 -#define R600_VGT__VGT_DMA_NUM_INSTANCES 7 -#define R600_VGT__VGT_MULTI_PRIM_IB_RESET_EN 8 -#define R600_VGT__VGT_INSTANCE_STEP_RATE_0 9 -#define R600_VGT__VGT_INSTANCE_STEP_RATE_1 10 -#define R600_VGT_SIZE 11 -#define R600_VGT_PM4 128 - -/* R600_DRAW */ -#define R600_DRAW__VGT_NUM_INDICES 0 -#define R600_DRAW__VGT_DMA_BASE_HI 1 -#define R600_DRAW__VGT_DMA_BASE 2 -#define R600_DRAW__VGT_DRAW_INITIATOR 3 -#define R600_DRAW_SIZE 4 -#define R600_DRAW_PM4 128 - -/* R600_VGT_EVENT */ -#define R600_VGT_EVENT__VGT_EVENT_INITIATOR 0 -#define R600_VGT_EVENT_SIZE 1 -#define R600_VGT_EVENT_PM4 128 - -/* R600_CB_FLUSH */ -#define R600_CB_FLUSH_SIZE 0 -#define R600_CB_FLUSH_PM4 128 - -/* R600_DB_FLUSH */ -#define R600_DB_FLUSH_SIZE 0 -#define R600_DB_FLUSH_PM4 128 - diff --git a/src/gallium/drivers/r600/r600_texture.c b/src/gallium/drivers/r600/r600_texture.c index d4d9b07c0e..dc351bfb62 100644 --- a/src/gallium/drivers/r600/r600_texture.c +++ b/src/gallium/drivers/r600/r600_texture.c @@ -27,6 +27,7 @@ #include <errno.h> #include <pipe/p_screen.h> #include <util/u_format.h> +#include <util/u_format_s3tc.h> #include <util/u_math.h> #include <util/u_inlines.h> #include <util/u_memory.h> @@ -38,8 +39,6 @@ #include "r600d.h" #include "r600_formats.h" -extern struct u_resource_vtbl r600_texture_vtbl; - /* Copy from a full GPU texture to a transfer's staging one. */ static void r600_copy_to_staging_texture(struct pipe_context *ctx, struct r600_transfer *rtransfer) { @@ -69,7 +68,7 @@ static void r600_copy_from_staging_texture(struct pipe_context *ctx, struct r600 rtransfer->staging_texture, 0, &sbox); - ctx->flush(ctx, 0, NULL); + ctx->flush(ctx, NULL); } unsigned r600_texture_get_offset(struct r600_resource_texture *rtex, @@ -77,17 +76,15 @@ unsigned r600_texture_get_offset(struct r600_resource_texture *rtex, { unsigned offset = rtex->offset[level]; - switch (rtex->resource.base.b.target) { + switch (rtex->resource.b.b.b.target) { case PIPE_TEXTURE_3D: case PIPE_TEXTURE_CUBE: - return offset + layer * rtex->layer_size[level]; default: - assert(layer == 0); - return offset; + return offset + layer * rtex->layer_size[level]; } } -static unsigned r600_get_pixel_alignment(struct pipe_screen *screen, +static unsigned r600_get_block_alignment(struct pipe_screen *screen, enum pipe_format format, unsigned array_mode) { @@ -105,6 +102,9 @@ static unsigned r600_get_pixel_alignment(struct pipe_screen *screen, (((rscreen->tiling_info->group_bytes / 8 / pixsize)) * rscreen->tiling_info->num_banks)) * 8; break; + case V_038000_ARRAY_LINEAR_ALIGNED: + p_align = MAX2(64, rscreen->tiling_info->group_bytes / pixsize); + break; case V_038000_ARRAY_LINEAR_GENERAL: default: p_align = rscreen->tiling_info->group_bytes / pixsize; @@ -124,8 +124,10 @@ static unsigned r600_get_height_alignment(struct pipe_screen *screen, h_align = rscreen->tiling_info->num_channels * 8; break; case V_038000_ARRAY_1D_TILED_THIN1: + case V_038000_ARRAY_LINEAR_ALIGNED: h_align = 8; break; + case V_038000_ARRAY_LINEAR_GENERAL: default: h_align = 1; break; @@ -139,7 +141,7 @@ static unsigned r600_get_base_alignment(struct pipe_screen *screen, { struct r600_screen* rscreen = (struct r600_screen *)screen; unsigned pixsize = util_format_get_blocksize(format); - int p_align = r600_get_pixel_alignment(screen, format, array_mode); + int p_align = r600_get_block_alignment(screen, format, array_mode); int h_align = r600_get_height_alignment(screen, array_mode); int b_align; @@ -149,6 +151,8 @@ static unsigned r600_get_base_alignment(struct pipe_screen *screen, p_align * pixsize * h_align); break; case V_038000_ARRAY_1D_TILED_THIN1: + case V_038000_ARRAY_LINEAR_ALIGNED: + case V_038000_ARRAY_LINEAR_GENERAL: default: b_align = rscreen->tiling_info->group_bytes; break; @@ -165,55 +169,46 @@ static unsigned mip_minify(unsigned size, unsigned level) return val; } -static unsigned r600_texture_get_stride(struct pipe_screen *screen, - struct r600_resource_texture *rtex, - unsigned level) +static unsigned r600_texture_get_nblocksx(struct pipe_screen *screen, + struct r600_resource_texture *rtex, + unsigned level) { - struct pipe_resource *ptex = &rtex->resource.base.b; - unsigned width, stride, tile_width; + struct pipe_resource *ptex = &rtex->resource.b.b.b; + unsigned nblocksx, block_align, width; + unsigned blocksize = util_format_get_blocksize(ptex->format); if (rtex->pitch_override) - return rtex->pitch_override; + return rtex->pitch_override / blocksize; width = mip_minify(ptex->width0, level); - if (util_format_is_plain(ptex->format)) { - tile_width = r600_get_pixel_alignment(screen, ptex->format, - rtex->array_mode[level]); - width = align(width, tile_width); - } - stride = util_format_get_stride(ptex->format, width); + nblocksx = util_format_get_nblocksx(ptex->format, width); - return stride; + block_align = r600_get_block_alignment(screen, ptex->format, + rtex->array_mode[level]); + nblocksx = align(nblocksx, block_align); + return nblocksx; } static unsigned r600_texture_get_nblocksy(struct pipe_screen *screen, struct r600_resource_texture *rtex, unsigned level) { - struct pipe_resource *ptex = &rtex->resource.base.b; + struct pipe_resource *ptex = &rtex->resource.b.b.b; unsigned height, tile_height; height = mip_minify(ptex->height0, level); - if (util_format_is_plain(ptex->format)) { - tile_height = r600_get_height_alignment(screen, - rtex->array_mode[level]); - height = align(height, tile_height); - } - return util_format_get_nblocksy(ptex->format, height); -} - -/* Get a width in pixels from a stride in bytes. */ -static unsigned pitch_to_width(enum pipe_format format, unsigned pitch_in_bytes) -{ - return (pitch_in_bytes / util_format_get_blocksize(format)) * - util_format_get_blockwidth(format); + height = util_format_get_nblocksy(ptex->format, height); + tile_height = r600_get_height_alignment(screen, + rtex->array_mode[level]); + height = align(height, tile_height); + return height; } static void r600_texture_set_array_mode(struct pipe_screen *screen, struct r600_resource_texture *rtex, unsigned level, unsigned array_mode) { - struct pipe_resource *ptex = &rtex->resource.base.b; + struct pipe_resource *ptex = &rtex->resource.b.b.b; switch (array_mode) { case V_0280A0_ARRAY_LINEAR_GENERAL: @@ -227,11 +222,11 @@ static void r600_texture_set_array_mode(struct pipe_screen *screen, unsigned w, h, tile_height, tile_width; tile_height = r600_get_height_alignment(screen, array_mode); - tile_width = r600_get_pixel_alignment(screen, ptex->format, array_mode); + tile_width = r600_get_block_alignment(screen, ptex->format, array_mode); w = mip_minify(ptex->width0, level); h = mip_minify(ptex->height0, level); - if (w < tile_width || h < tile_height) + if (w <= tile_width || h <= tile_height) rtex->array_mode[level] = V_0280A0_ARRAY_1D_TILED_THIN1; else rtex->array_mode[level] = array_mode; @@ -244,40 +239,119 @@ static void r600_setup_miptree(struct pipe_screen *screen, struct r600_resource_texture *rtex, unsigned array_mode) { - struct pipe_resource *ptex = &rtex->resource.base.b; + struct pipe_resource *ptex = &rtex->resource.b.b.b; struct radeon *radeon = (struct radeon *)screen->winsys; enum chip_class chipc = r600_get_family_class(radeon); - unsigned pitch, size, layer_size, i, offset; - unsigned nblocksy; + unsigned size, layer_size, i, offset; + unsigned nblocksx, nblocksy; for (i = 0, offset = 0; i <= ptex->last_level; i++) { + unsigned blocksize = util_format_get_blocksize(ptex->format); + r600_texture_set_array_mode(screen, rtex, i, array_mode); - pitch = r600_texture_get_stride(screen, rtex, i); + nblocksx = r600_texture_get_nblocksx(screen, rtex, i); nblocksy = r600_texture_get_nblocksy(screen, rtex, i); - layer_size = pitch * nblocksy; - + layer_size = nblocksx * nblocksy * blocksize; if (ptex->target == PIPE_TEXTURE_CUBE) { if (chipc >= R700) size = layer_size * 8; else size = layer_size * 6; } - else + else if (ptex->target == PIPE_TEXTURE_3D) size = layer_size * u_minify(ptex->depth0, i); + else + size = layer_size * ptex->array_size; + /* align base image and start of miptree */ if ((i == 0) || (i == 1)) offset = align(offset, r600_get_base_alignment(screen, ptex->format, array_mode)); rtex->offset[i] = offset; rtex->layer_size[i] = layer_size; - rtex->pitch_in_bytes[i] = pitch; - rtex->pitch_in_pixels[i] = pitch_to_width(ptex->format, pitch); + rtex->pitch_in_blocks[i] = nblocksx; /* CB talks in elements */ + rtex->pitch_in_bytes[i] = nblocksx * blocksize; + offset += size; } rtex->size = offset; } +/* Figure out whether u_blitter will fallback to a transfer operation. + * If so, don't use a staging resource. + */ +static boolean permit_hardware_blit(struct pipe_screen *screen, + const struct pipe_resource *res) +{ + unsigned bind; + + if (util_format_is_depth_or_stencil(res->format)) + bind = PIPE_BIND_DEPTH_STENCIL; + else + bind = PIPE_BIND_RENDER_TARGET; + + /* hackaround for S3TC */ + if (util_format_is_compressed(res->format)) + return TRUE; + + if (!screen->is_format_supported(screen, + res->format, + res->target, + res->nr_samples, + bind)) + return FALSE; + + if (!screen->is_format_supported(screen, + res->format, + res->target, + res->nr_samples, + PIPE_BIND_SAMPLER_VIEW)) + return FALSE; + + return TRUE; +} + +static boolean r600_texture_get_handle(struct pipe_screen* screen, + struct pipe_resource *ptex, + struct winsys_handle *whandle) +{ + struct r600_resource_texture *rtex = (struct r600_resource_texture*)ptex; + struct r600_resource *resource = &rtex->resource; + struct radeon *radeon = (struct radeon *)screen->winsys; + + return r600_bo_get_winsys_handle(radeon, resource->bo, + rtex->pitch_in_bytes[0], whandle); +} + +static void r600_texture_destroy(struct pipe_screen *screen, + struct pipe_resource *ptex) +{ + struct r600_resource_texture *rtex = (struct r600_resource_texture*)ptex; + struct r600_resource *resource = &rtex->resource; + struct radeon *radeon = (struct radeon *)screen->winsys; + + if (rtex->flushed_depth_texture) + pipe_resource_reference((struct pipe_resource **)&rtex->flushed_depth_texture, NULL); + + if (resource->bo) { + r600_bo_reference(radeon, &resource->bo, NULL); + } + FREE(rtex); +} + +static const struct u_resource_vtbl r600_texture_vtbl = +{ + r600_texture_get_handle, /* get_handle */ + r600_texture_destroy, /* resource_destroy */ + r600_texture_get_transfer, /* get_transfer */ + r600_texture_transfer_destroy, /* transfer_destroy */ + r600_texture_transfer_map, /* transfer_map */ + u_default_transfer_flush_region,/* transfer_flush_region */ + r600_texture_transfer_unmap, /* transfer_unmap */ + u_default_transfer_inline_write /* transfer_inline_write */ +}; + static struct r600_resource_texture * r600_texture_create_object(struct pipe_screen *screen, const struct pipe_resource *base, @@ -295,21 +369,22 @@ r600_texture_create_object(struct pipe_screen *screen, return NULL; resource = &rtex->resource; - resource->base.b = *base; - resource->base.vtbl = &r600_texture_vtbl; - pipe_reference_init(&resource->base.b.reference, 1); - resource->base.b.screen = screen; + resource->b.b.b = *base; + resource->b.b.vtbl = &r600_texture_vtbl; + pipe_reference_init(&resource->b.b.b.reference, 1); + resource->b.b.b.screen = screen; resource->bo = bo; rtex->pitch_override = pitch_in_bytes_override; + /* only mark depth textures the HW can hit as depth textures */ + if (util_format_is_depth_or_stencil(base->format) && permit_hardware_blit(screen, base)) + rtex->depth = 1; - if (array_mode) - rtex->tiled = 1; r600_setup_miptree(screen, rtex, array_mode); resource->size = rtex->size; if (!resource->bo) { - struct pipe_resource *ptex = &rtex->resource.base.b; + struct pipe_resource *ptex = &rtex->resource.b.b.b; int base_align = r600_get_base_alignment(screen, ptex->format, array_mode); resource->bo = r600_bo(radeon, rtex->size, base_align, base->bind, base->usage); @@ -329,56 +404,38 @@ struct pipe_resource *r600_texture_create(struct pipe_screen *screen, /* Would like some magic "get_bool_option_once" routine. */ - if (force_tiling == -1) - force_tiling = debug_get_bool_option("R600_FORCE_TILING", FALSE); + if (force_tiling == -1) { +#if 0 + /* reenable when 2D tiling is fixed better */ + struct r600_screen *rscreen = (struct r600_screen *)screen; + if (r600_get_minor_version(rscreen->radeon) >= 9) + force_tiling = debug_get_bool_option("R600_TILING", TRUE); +#endif + force_tiling = debug_get_bool_option("R600_TILING", FALSE); + } - if (force_tiling) { + if (force_tiling && permit_hardware_blit(screen, templ)) { if (!(templ->flags & R600_RESOURCE_FLAG_TRANSFER) && !(templ->bind & PIPE_BIND_SCANOUT)) { array_mode = V_038000_ARRAY_2D_TILED_THIN1; } } + if (!(templ->flags & R600_RESOURCE_FLAG_TRANSFER) && + util_format_is_compressed(templ->format)) + array_mode = V_038000_ARRAY_1D_TILED_THIN1; + return (struct pipe_resource *)r600_texture_create_object(screen, templ, array_mode, 0, 0, NULL); } -static void r600_texture_destroy(struct pipe_screen *screen, - struct pipe_resource *ptex) -{ - struct r600_resource_texture *rtex = (struct r600_resource_texture*)ptex; - struct r600_resource *resource = &rtex->resource; - struct radeon *radeon = (struct radeon *)screen->winsys; - - if (rtex->flushed_depth_texture) - pipe_resource_reference((struct pipe_resource **)&rtex->flushed_depth_texture, NULL); - - if (resource->bo) { - r600_bo_reference(radeon, &resource->bo, NULL); - } - FREE(rtex); -} - -static boolean r600_texture_get_handle(struct pipe_screen* screen, - struct pipe_resource *ptex, - struct winsys_handle *whandle) -{ - struct r600_resource_texture *rtex = (struct r600_resource_texture*)ptex; - struct r600_resource *resource = &rtex->resource; - struct radeon *radeon = (struct radeon *)screen->winsys; - - return r600_bo_get_winsys_handle(radeon, resource->bo, - rtex->pitch_in_bytes[0], whandle); -} - static struct pipe_surface *r600_create_surface(struct pipe_context *pipe, struct pipe_resource *texture, const struct pipe_surface *surf_tmpl) { struct r600_resource_texture *rtex = (struct r600_resource_texture*)texture; struct r600_surface *surface = CALLOC_STRUCT(r600_surface); - unsigned tile_height; unsigned level = surf_tmpl->u.tex.level; assert(surf_tmpl->u.tex.first_layer == surf_tmpl->u.tex.last_layer); @@ -398,8 +455,8 @@ static struct pipe_surface *r600_create_surface(struct pipe_context *pipe, surface->base.u.tex.last_layer = surf_tmpl->u.tex.last_layer; surface->base.u.tex.level = level; - tile_height = r600_get_height_alignment(pipe->screen, rtex->array_mode[level]); - surface->aligned_height = align(surface->base.height, tile_height); + surface->aligned_height = r600_texture_get_nblocksy(pipe->screen, + rtex, level); return &surface->base; } @@ -435,18 +492,8 @@ struct pipe_resource *r600_texture_from_handle(struct pipe_screen *screen, bo); } -static unsigned int r600_texture_is_referenced(struct pipe_context *context, - struct pipe_resource *texture, - unsigned level, int layer) -{ - /* FIXME */ - return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE; -} - -int (*r600_blit_uncompress_depth_ptr)(struct pipe_context *ctx, struct r600_resource_texture *texture); - int r600_texture_depth_flush(struct pipe_context *ctx, - struct pipe_resource *texture) + struct pipe_resource *texture, boolean just_create) { struct r600_resource_texture *rtex = (struct r600_resource_texture*)texture; struct pipe_resource resource; @@ -459,7 +506,8 @@ int r600_texture_depth_flush(struct pipe_context *ctx, resource.width0 = texture->width0; resource.height0 = texture->height0; resource.depth0 = 1; - resource.last_level = 0; + resource.array_size = 1; + resource.last_level = texture->last_level; resource.nr_samples = 0; resource.usage = PIPE_USAGE_DYNAMIC; resource.bind = 0; @@ -473,10 +521,14 @@ int r600_texture_depth_flush(struct pipe_context *ctx, return -ENOMEM; } + ((struct r600_resource_texture *)rtex->flushed_depth_texture)->is_flushing_texture = TRUE; out: + if (just_create) + return 0; + /* XXX: only do this if the depth texture has actually changed: */ - r600_blit_uncompress_depth_ptr(ctx, rtex); + r600_blit_uncompress_depth(ctx, rtex); return 0; } @@ -487,46 +539,6 @@ static INLINE unsigned u_box_volume( const struct pipe_box *box ) return box->width * box->depth * box->height; }; - -/* Figure out whether u_blitter will fallback to a transfer operation. - * If so, don't use a staging resource. - */ -static boolean permit_hardware_blit(struct pipe_screen *screen, - struct pipe_resource *res) -{ - unsigned bind; - - if (util_format_is_depth_or_stencil(res->format)) - bind = PIPE_BIND_DEPTH_STENCIL; - else - bind = PIPE_BIND_RENDER_TARGET; - - /* See r600_resource_copy_region: there is something wrong - * with depth resource copies at the moment so avoid them for - * now. - */ - if (util_format_get_component_bits(res->format, - UTIL_FORMAT_COLORSPACE_ZS, - 0) != 0) - return FALSE; - - if (!screen->is_format_supported(screen, - res->format, - res->target, - res->nr_samples, - bind, 0)) - return FALSE; - - if (!screen->is_format_supported(screen, - res->format, - res->target, - res->nr_samples, - PIPE_BIND_SAMPLER_VIEW, 0)) - return FALSE; - - return TRUE; -} - struct pipe_transfer* r600_texture_get_transfer(struct pipe_context *ctx, struct pipe_resource *texture, unsigned level, @@ -546,7 +558,7 @@ struct pipe_transfer* r600_texture_get_transfer(struct pipe_context *ctx, * the CPU is much happier reading out of cached system memory * than uncached VRAM. */ - if (rtex->tiled) + if (R600_TEX_IS_TILED(rtex, level)) use_staging_texture = TRUE; if ((usage & PIPE_TRANSFER_READ) && u_box_volume(box) > 1024) @@ -579,13 +591,16 @@ struct pipe_transfer* r600_texture_get_transfer(struct pipe_context *ctx, */ /* XXX: when discard is true, no need to read back from depth texture */ - r = r600_texture_depth_flush(ctx, texture); + r = r600_texture_depth_flush(ctx, texture, FALSE); if (r < 0) { R600_ERR("failed to create temporary texture to hold untiled copy\n"); pipe_resource_reference(&trans->transfer.resource, NULL); FREE(trans); return NULL; } + trans->transfer.stride = rtex->flushed_depth_texture->pitch_in_bytes[level]; + trans->offset = r600_texture_get_offset(rtex->flushed_depth_texture, level, box->z); + return &trans->transfer; } else if (use_staging_texture) { resource.target = PIPE_TEXTURE_2D; resource.format = texture->format; @@ -622,11 +637,12 @@ struct pipe_transfer* r600_texture_get_transfer(struct pipe_context *ctx, if (usage & PIPE_TRANSFER_READ) { r600_copy_to_staging_texture(ctx, trans); /* Always referenced in the blit. */ - ctx->flush(ctx, 0, NULL); + ctx->flush(ctx, NULL); } return &trans->transfer; } trans->transfer.stride = rtex->pitch_in_bytes[level]; + trans->transfer.layer_stride = rtex->layer_size[level]; trans->offset = r600_texture_get_offset(rtex, level, box->z); return &trans->transfer; } @@ -635,7 +651,8 @@ void r600_texture_transfer_destroy(struct pipe_context *ctx, struct pipe_transfer *transfer) { struct r600_transfer *rtransfer = (struct r600_transfer*)transfer; - struct r600_resource_texture *rtex = (struct r600_resource_texture*)transfer->resource; + struct pipe_resource *texture = transfer->resource; + struct r600_resource_texture *rtex = (struct r600_resource_texture*)texture; if (rtransfer->staging_texture) { if (transfer->usage & PIPE_TRANSFER_WRITE) { @@ -643,9 +660,12 @@ void r600_texture_transfer_destroy(struct pipe_context *ctx, } pipe_resource_reference(&rtransfer->staging_texture, NULL); } - if (rtex->flushed_depth_texture) { - pipe_resource_reference((struct pipe_resource **)&rtex->flushed_depth_texture, NULL); + + if (rtex->depth && !rtex->is_flushing_texture) { + if ((transfer->usage & PIPE_TRANSFER_WRITE) && rtex->flushed_depth_texture) + r600_blit_push_depth(ctx, rtex); } + pipe_resource_reference(&transfer->resource, NULL); FREE(transfer); } @@ -727,19 +747,6 @@ void r600_texture_transfer_unmap(struct pipe_context *ctx, r600_bo_unmap(radeon, bo); } -struct u_resource_vtbl r600_texture_vtbl = -{ - r600_texture_get_handle, /* get_handle */ - r600_texture_destroy, /* resource_destroy */ - r600_texture_is_referenced, /* is_resource_referenced */ - r600_texture_get_transfer, /* get_transfer */ - r600_texture_transfer_destroy, /* transfer_destroy */ - r600_texture_transfer_map, /* transfer_map */ - u_default_transfer_flush_region,/* transfer_flush_region */ - r600_texture_transfer_unmap, /* transfer_unmap */ - u_default_transfer_inline_write /* transfer_inline_write */ -}; - void r600_init_surface_functions(struct r600_pipe_context *r600) { r600->context.create_surface = r600_create_surface; @@ -795,13 +802,16 @@ static unsigned r600_get_swizzle_combined(const unsigned char *swizzle_format, } /* texture format translate */ -uint32_t r600_translate_texformat(enum pipe_format format, - const unsigned char *swizzle_view, +uint32_t r600_translate_texformat(struct pipe_screen *screen, + enum pipe_format format, + const unsigned char *swizzle_view, uint32_t *word4_p, uint32_t *yuv_format_p) { uint32_t result = 0, word4 = 0, yuv_format = 0; const struct util_format_description *desc; boolean uniform = TRUE; + static int r600_enable_s3tc = -1; + int i; const uint32_t sign_bit[4] = { S_038010_FORMAT_COMP_X(V_038010_SQ_FORMAT_COMP_SIGNED), @@ -850,37 +860,65 @@ uint32_t r600_translate_texformat(enum pipe_format format, break; } goto out_unknown; /* TODO */ - + case UTIL_FORMAT_COLORSPACE_SRGB: word4 |= S_038010_FORCE_DEGAMMA(1); - if (format == PIPE_FORMAT_L8A8_SRGB || format == PIPE_FORMAT_L8_SRGB) - goto out_unknown; /* fails for some reason - TODO */ break; default: break; } - /* S3TC formats. TODO */ - if (desc->layout == UTIL_FORMAT_LAYOUT_S3TC) { - static int r600_enable_s3tc = -1; + if (r600_enable_s3tc == -1) { + struct r600_screen *rscreen = (struct r600_screen *)screen; + if (r600_get_minor_version(rscreen->radeon) >= 9) + r600_enable_s3tc = 1; + else + r600_enable_s3tc = debug_get_bool_option("R600_ENABLE_S3TC", FALSE); + } + + if (desc->layout == UTIL_FORMAT_LAYOUT_RGTC) { + if (!r600_enable_s3tc) + goto out_unknown; - if (r600_enable_s3tc == -1) - r600_enable_s3tc = - debug_get_bool_option("R600_ENABLE_S3TC", FALSE); + switch (format) { + case PIPE_FORMAT_RGTC1_SNORM: + word4 |= sign_bit[0]; + case PIPE_FORMAT_RGTC1_UNORM: + result = FMT_BC4; + goto out_word4; + case PIPE_FORMAT_RGTC2_SNORM: + word4 |= sign_bit[0] | sign_bit[1]; + case PIPE_FORMAT_RGTC2_UNORM: + result = FMT_BC5; + goto out_word4; + default: + goto out_unknown; + } + } + + if (desc->layout == UTIL_FORMAT_LAYOUT_S3TC) { if (!r600_enable_s3tc) goto out_unknown; + if (!util_format_s3tc_enabled) { + goto out_unknown; + } + switch (format) { case PIPE_FORMAT_DXT1_RGB: case PIPE_FORMAT_DXT1_RGBA: + case PIPE_FORMAT_DXT1_SRGB: + case PIPE_FORMAT_DXT1_SRGBA: result = FMT_BC1; goto out_word4; case PIPE_FORMAT_DXT3_RGBA: + case PIPE_FORMAT_DXT3_SRGBA: result = FMT_BC2; goto out_word4; case PIPE_FORMAT_DXT5_RGBA: + case PIPE_FORMAT_DXT5_SRGBA: result = FMT_BC3; goto out_word4; default: @@ -889,7 +927,7 @@ uint32_t r600_translate_texformat(enum pipe_format format, } - for (i = 0; i < desc->nr_channels; i++) { + for (i = 0; i < desc->nr_channels; i++) { if (desc->channel[i].type == UTIL_FORMAT_TYPE_SIGNED) { word4 |= sign_bit[i]; } @@ -897,13 +935,11 @@ uint32_t r600_translate_texformat(enum pipe_format format, /* R8G8Bx_SNORM - TODO CxV8U8 */ - /* RGTC - TODO */ - /* See whether the components are of the same size. */ for (i = 1; i < desc->nr_channels; i++) { uniform = uniform && desc->channel[0].size == desc->channel[i].size; } - + /* Non-uniform formats. */ if (!uniform) { switch(desc->nr_channels) { @@ -927,7 +963,7 @@ uint32_t r600_translate_texformat(enum pipe_format format, desc->channel[1].size == 10 && desc->channel[2].size == 10 && desc->channel[3].size == 2) { - result = FMT_10_10_10_2; + result = FMT_2_10_10_10; goto out_word4; } goto out_unknown; @@ -990,6 +1026,19 @@ uint32_t r600_translate_texformat(enum pipe_format format, result = FMT_16_16_16_16; goto out_word4; } + goto out_unknown; + case 32: + switch (desc->nr_channels) { + case 1: + result = FMT_32; + goto out_word4; + case 2: + result = FMT_32_32; + goto out_word4; + case 4: + result = FMT_32_32_32_32; + goto out_word4; + } } goto out_unknown; @@ -1021,7 +1070,7 @@ uint32_t r600_translate_texformat(enum pipe_format format, goto out_word4; } } - + } out_word4: if (word4_p) diff --git a/src/gallium/drivers/r600/r600_translate.c b/src/gallium/drivers/r600/r600_translate.c index 1c227d3215..7482d15e12 100644 --- a/src/gallium/drivers/r600/r600_translate.c +++ b/src/gallium/drivers/r600/r600_translate.c @@ -22,181 +22,34 @@ * * Authors: Dave Airlie <airlied@redhat.com> */ -#include "translate/translate_cache.h" -#include "translate/translate.h" -#include <pipebuffer/pb_buffer.h> + #include <util/u_index_modify.h> +#include "util/u_inlines.h" +#include "util/u_upload_mgr.h" #include "r600_pipe.h" -void r600_begin_vertex_translate(struct r600_pipe_context *rctx) -{ - struct pipe_context *pipe = &rctx->context; - struct translate_key key = {0}; - struct translate_element *te; - unsigned tr_elem_index[PIPE_MAX_ATTRIBS] = {0}; - struct translate *tr; - struct r600_vertex_element *ve = rctx->vertex_elements; - boolean vb_translated[PIPE_MAX_ATTRIBS] = {0}; - void *vb_map[PIPE_MAX_ATTRIBS] = {0}, *out_map; - struct pipe_transfer *vb_transfer[PIPE_MAX_ATTRIBS] = {0}, *out_transfer; - struct pipe_resource *out_buffer; - unsigned i, num_verts; - struct pipe_vertex_element new_velems[PIPE_MAX_ATTRIBS]; - - /* Initialize the translate key, i.e. the recipe how vertices should be - * translated. */ - for (i = 0; i < ve->count; i++) { - struct pipe_vertex_buffer *vb = - &rctx->vertex_buffer[ve->elements[i].vertex_buffer_index]; - enum pipe_format output_format = ve->hw_format[i]; - unsigned output_format_size = ve->hw_format_size[i]; - - /* Check for support. */ - if (ve->elements[i].src_format == ve->hw_format[i]) { - continue; - } - - /* Workaround for translate: output floats instead of halfs. */ - switch (output_format) { - case PIPE_FORMAT_R16_FLOAT: - output_format = PIPE_FORMAT_R32_FLOAT; - output_format_size = 4; - break; - case PIPE_FORMAT_R16G16_FLOAT: - output_format = PIPE_FORMAT_R32G32_FLOAT; - output_format_size = 8; - break; - case PIPE_FORMAT_R16G16B16_FLOAT: - output_format = PIPE_FORMAT_R32G32B32_FLOAT; - output_format_size = 12; - break; - case PIPE_FORMAT_R16G16B16A16_FLOAT: - output_format = PIPE_FORMAT_R32G32B32A32_FLOAT; - output_format_size = 16; - break; - default:; - } - - /* Add this vertex element. */ - te = &key.element[key.nr_elements]; - /*te->type; - te->instance_divisor;*/ - te->input_buffer = ve->elements[i].vertex_buffer_index; - te->input_format = ve->elements[i].src_format; - te->input_offset = vb->buffer_offset + ve->elements[i].src_offset; - te->output_format = output_format; - te->output_offset = key.output_stride; - - key.output_stride += output_format_size; - vb_translated[ve->elements[i].vertex_buffer_index] = TRUE; - tr_elem_index[i] = key.nr_elements; - key.nr_elements++; - } - - /* Get a translate object. */ - tr = translate_cache_find(rctx->tran.translate_cache, &key); - - /* Map buffers we want to translate. */ - for (i = 0; i < rctx->nvertex_buffer; i++) { - if (vb_translated[i]) { - struct pipe_vertex_buffer *vb = &rctx->vertex_buffer[i]; - - vb_map[i] = pipe_buffer_map(pipe, vb->buffer, - PIPE_TRANSFER_READ, &vb_transfer[i]); - - tr->set_buffer(tr, i, vb_map[i], vb->stride, vb->max_index); - } - } - - /* Create and map the output buffer. */ - num_verts = rctx->vb_max_index + 1; - - out_buffer = pipe_buffer_create(&rctx->screen->screen, - PIPE_BIND_VERTEX_BUFFER, - key.output_stride * num_verts); - - out_map = pipe_buffer_map(pipe, out_buffer, PIPE_TRANSFER_WRITE, - &out_transfer); - - /* Translate. */ - tr->run(tr, 0, num_verts, 0, out_map); - - /* Unmap all buffers. */ - for (i = 0; i < rctx->nvertex_buffer; i++) { - if (vb_translated[i]) { - pipe_buffer_unmap(pipe, rctx->vertex_buffer[i].buffer, - vb_transfer[i]); - } - } - - pipe_buffer_unmap(pipe, out_buffer, out_transfer); - - /* Setup the new vertex buffer in the first free slot. */ - for (i = 0; i < PIPE_MAX_ATTRIBS; i++) { - struct pipe_vertex_buffer *vb = &rctx->vertex_buffer[i]; - - if (!vb->buffer) { - pipe_resource_reference(&vb->buffer, out_buffer); - vb->buffer_offset = 0; - vb->max_index = num_verts - 1; - vb->stride = key.output_stride; - rctx->tran.vb_slot = i; - break; - } - } - - /* Save and replace vertex elements. */ - for (i = 0; i < ve->count; i++) { - if (vb_translated[ve->elements[i].vertex_buffer_index]) { - te = &key.element[tr_elem_index[i]]; - new_velems[i].instance_divisor = ve->elements[i].instance_divisor; - new_velems[i].src_format = te->output_format; - new_velems[i].src_offset = te->output_offset; - new_velems[i].vertex_buffer_index = rctx->tran.vb_slot; - } else { - memcpy(&new_velems[i], &ve->elements[i], - sizeof(struct pipe_vertex_element)); - } - } - - rctx->tran.new_velems = pipe->create_vertex_elements_state(pipe, ve->count, new_velems); - pipe->bind_vertex_elements_state(pipe, rctx->tran.new_velems); - - pipe_resource_reference(&out_buffer, NULL); -} - -void r600_end_vertex_translate(struct r600_pipe_context *rctx) -{ - struct pipe_context *pipe = &rctx->context; - - if (rctx->tran.new_velems == NULL) { - return; - } - /* Restore vertex elements. */ - if (rctx->vertex_elements == rctx->tran.new_velems) { - pipe->bind_vertex_elements_state(pipe, NULL); - } - pipe->delete_vertex_elements_state(pipe, rctx->tran.new_velems); - rctx->tran.new_velems = NULL; - - /* Delete the now-unused VBO. */ - pipe_resource_reference(&rctx->vertex_buffer[rctx->tran.vb_slot].buffer, - NULL); -} void r600_translate_index_buffer(struct r600_pipe_context *r600, - struct pipe_resource **index_buffer, - unsigned *index_size, - unsigned *start, unsigned count) + struct pipe_resource **index_buffer, + unsigned *index_size, + unsigned *start, unsigned count) { + struct pipe_resource *out_buffer = NULL; + unsigned out_offset; + void *ptr; + boolean flushed; + switch (*index_size) { case 1: - util_shorten_ubyte_elts(&r600->context, index_buffer, 0, *start, count); + u_upload_alloc(r600->vbuf_mgr->uploader, 0, count * 2, + &out_offset, &out_buffer, &flushed, &ptr); + + util_shorten_ubyte_elts_to_userptr( + &r600->context, *index_buffer, 0, *start, count, ptr); + + pipe_resource_reference(index_buffer, out_buffer); *index_size = 2; - *start = 0; - break; - case 2: - case 4: + *start = out_offset / 2; break; } } diff --git a/src/gallium/drivers/r600/r600d.h b/src/gallium/drivers/r600/r600d.h index ae19bfb828..df70e2889e 100644 --- a/src/gallium/drivers/r600/r600d.h +++ b/src/gallium/drivers/r600/r600d.h @@ -67,6 +67,10 @@ #define PKT3_SET_CTL_CONST 0x6F #define PKT3_SURFACE_BASE_UPDATE 0x73 +#define PREDICATION_OP_CLEAR 0x0 +#define PREDICATION_OP_ZPASS 0x1 +#define PREDICATION_OP_PRIMCOUNT 0x2 + #define PKT_TYPE_S(x) (((x) & 0x3) << 30) #define PKT_TYPE_G(x) (((x) >> 30) & 0x3) #define PKT_TYPE_C 0x3FFFFFFF @@ -248,6 +252,8 @@ #define S_0280A0_SOURCE_FORMAT(x) (((x) & 0x1) << 27) #define G_0280A0_SOURCE_FORMAT(x) (((x) >> 27) & 0x1) #define C_0280A0_SOURCE_FORMAT 0xF7FFFFFF +#define V_0280A0_EXPORT_FULL 0 +#define V_0280A0_EXPORT_NORM 1 #define R_028060_CB_COLOR0_SIZE 0x028060 #define S_028060_PITCH_TILE_MAX(x) (((x) & 0x3FF) << 0) #define G_028060_PITCH_TILE_MAX(x) (((x) >> 0) & 0x3FF) @@ -960,8 +966,8 @@ #define S_038010_SRF_MODE_ALL(x) (((x) & 0x1) << 10) #define G_038010_SRF_MODE_ALL(x) (((x) >> 10) & 0x1) #define C_038010_SRF_MODE_ALL 0xFFFFFBFF -#define V_038010_SFR_MODE_ZERO_CLAMP_MINUS_ONE 0x00000000 -#define V_038010_SFR_MODE_NO_ZERO 0x00000001 +#define V_038010_SRF_MODE_ZERO_CLAMP_MINUS_ONE 0x00000000 +#define V_038010_SRF_MODE_NO_ZERO 0x00000001 #define S_038010_FORCE_DEGAMMA(x) (((x) & 0x1) << 11) #define G_038010_FORCE_DEGAMMA(x) (((x) >> 11) & 0x1) #define C_038010_FORCE_DEGAMMA 0xFFFFF7FF @@ -2332,31 +2338,6 @@ #define R_0280D4_CB_COLOR5_TILE 0x0280D4 #define R_0280D8_CB_COLOR6_TILE 0x0280D8 #define R_0280DC_CB_COLOR7_TILE 0x0280DC -#define R_028808_CB_COLOR_CONTROL 0x028808 -#define S_028808_FOG_ENABLE(x) (((x) & 0x1) << 0) -#define G_028808_FOG_ENABLE(x) (((x) >> 0) & 0x1) -#define C_028808_FOG_ENABLE 0xFFFFFFFE -#define S_028808_MULTIWRITE_ENABLE(x) (((x) & 0x1) << 1) -#define G_028808_MULTIWRITE_ENABLE(x) (((x) >> 1) & 0x1) -#define C_028808_MULTIWRITE_ENABLE 0xFFFFFFFD -#define S_028808_DITHER_ENABLE(x) (((x) & 0x1) << 2) -#define G_028808_DITHER_ENABLE(x) (((x) >> 2) & 0x1) -#define C_028808_DITHER_ENABLE 0xFFFFFFFB -#define S_028808_DEGAMMA_ENABLE(x) (((x) & 0x1) << 3) -#define G_028808_DEGAMMA_ENABLE(x) (((x) >> 3) & 0x1) -#define C_028808_DEGAMMA_ENABLE 0xFFFFFFF7 -#define S_028808_SPECIAL_OP(x) (((x) & 0x7) << 4) -#define G_028808_SPECIAL_OP(x) (((x) >> 4) & 0x7) -#define C_028808_SPECIAL_OP 0xFFFFFF8F -#define S_028808_PER_MRT_BLEND(x) (((x) & 0x1) << 7) -#define G_028808_PER_MRT_BLEND(x) (((x) >> 7) & 0x1) -#define C_028808_PER_MRT_BLEND 0xFFFFFF7F -#define S_028808_TARGET_BLEND_ENABLE(x) (((x) & 0xFF) << 8) -#define G_028808_TARGET_BLEND_ENABLE(x) (((x) >> 8) & 0xFF) -#define C_028808_TARGET_BLEND_ENABLE 0xFFFF00FF -#define S_028808_ROP3(x) (((x) & 0xFF) << 16) -#define G_028808_ROP3(x) (((x) >> 16) & 0xFF) -#define C_028808_ROP3 0xFF00FFFF #define R_028614_SPI_VS_OUT_ID_0 0x028614 #define S_028614_SEMANTIC_0(x) (((x) & 0xFF) << 0) #define G_028614_SEMANTIC_0(x) (((x) >> 0) & 0xFF) diff --git a/src/gallium/drivers/r600/r700_asm.c b/src/gallium/drivers/r600/r700_asm.c index 892dee86ba..b3c7d1494f 100644 --- a/src/gallium/drivers/r600/r700_asm.c +++ b/src/gallium/drivers/r600/r700_asm.c @@ -26,11 +26,18 @@ #include "r600_asm.h" #include "r700_sq.h" +void r700_bc_cf_vtx_build(uint32_t *bytecode, const struct r600_bc_cf *cf) +{ + unsigned count = (cf->ndw / 4) - 1; + *bytecode++ = S_SQ_CF_WORD0_ADDR(cf->addr >> 1); + *bytecode++ = S_SQ_CF_WORD1_CF_INST(cf->inst) | + S_SQ_CF_WORD1_BARRIER(1) | + S_SQ_CF_WORD1_COUNT(count) | + S_SQ_CF_WORD1_COUNT_3(count >> 3); +} int r700_bc_alu_build(struct r600_bc *bc, struct r600_bc_alu *alu, unsigned id) { - unsigned i; - bc->bytecode[id++] = S_SQ_ALU_WORD0_SRC0_SEL(alu->src[0].sel) | S_SQ_ALU_WORD0_SRC0_REL(alu->src[0].rel) | S_SQ_ALU_WORD0_SRC0_CHAN(alu->src[0].chan) | @@ -61,18 +68,11 @@ int r700_bc_alu_build(struct r600_bc *bc, struct r600_bc_alu *alu, unsigned id) S_SQ_ALU_WORD1_OP2_SRC0_ABS(alu->src[0].abs) | S_SQ_ALU_WORD1_OP2_SRC1_ABS(alu->src[1].abs) | S_SQ_ALU_WORD1_OP2_WRITE_MASK(alu->dst.write) | + S_SQ_ALU_WORD1_OP2_OMOD(alu->omod) | S_SQ_ALU_WORD1_OP2_ALU_INST(alu->inst) | S_SQ_ALU_WORD1_BANK_SWIZZLE(alu->bank_swizzle) | S_SQ_ALU_WORD1_OP2_UPDATE_EXECUTE_MASK(alu->predicate) | S_SQ_ALU_WORD1_OP2_UPDATE_PRED(alu->predicate); } - if (alu->last) { - if (alu->nliteral && !alu->literal_added) { - R600_ERR("Bug in ALU processing for instruction 0x%08x, literal not added correctly\n", alu->inst); - } - for (i = 0; i < alu->nliteral; i++) { - bc->bytecode[id++] = alu->value[i]; - } - } return 0; } |