summaryrefslogtreecommitdiff
path: root/src/gallium/drivers/r600
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium/drivers/r600')
-rw-r--r--src/gallium/drivers/r600/Android.mk43
-rw-r--r--src/gallium/drivers/r600/SConscript2
-rw-r--r--src/gallium/drivers/r600/eg_asm.c51
-rw-r--r--src/gallium/drivers/r600/eg_state_inlines.h176
-rw-r--r--src/gallium/drivers/r600/eg_states_inc.h458
-rw-r--r--src/gallium/drivers/r600/evergreen_state.c520
-rw-r--r--src/gallium/drivers/r600/evergreend.h22
-rw-r--r--src/gallium/drivers/r600/r600.h25
-rw-r--r--src/gallium/drivers/r600/r600_asm.c1833
-rw-r--r--src/gallium/drivers/r600/r600_asm.h33
-rw-r--r--src/gallium/drivers/r600/r600_blit.c160
-rw-r--r--src/gallium/drivers/r600/r600_buffer.c343
-rw-r--r--src/gallium/drivers/r600/r600_opcodes.h4
-rw-r--r--src/gallium/drivers/r600/r600_pipe.c148
-rw-r--r--src/gallium/drivers/r600/r600_pipe.h92
-rw-r--r--src/gallium/drivers/r600/r600_query.c30
-rw-r--r--src/gallium/drivers/r600/r600_resource.c1
-rw-r--r--src/gallium/drivers/r600/r600_resource.h70
-rw-r--r--src/gallium/drivers/r600/r600_shader.c1321
-rw-r--r--src/gallium/drivers/r600/r600_shader.h3
-rw-r--r--src/gallium/drivers/r600/r600_sq.h6
-rw-r--r--src/gallium/drivers/r600/r600_state.c594
-rw-r--r--src/gallium/drivers/r600/r600_state_common.c358
-rw-r--r--src/gallium/drivers/r600/r600_state_inlines.h188
-rw-r--r--src/gallium/drivers/r600/r600_states_inc.h543
-rw-r--r--src/gallium/drivers/r600/r600_texture.c405
-rw-r--r--src/gallium/drivers/r600/r600_translate.c185
-rw-r--r--src/gallium/drivers/r600/r600d.h35
-rw-r--r--src/gallium/drivers/r600/r700_asm.c20
29 files changed, 3595 insertions, 4074 deletions
diff --git a/src/gallium/drivers/r600/Android.mk b/src/gallium/drivers/r600/Android.mk
new file mode 100644
index 0000000000..b76a78810f
--- /dev/null
+++ b/src/gallium/drivers/r600/Android.mk
@@ -0,0 +1,43 @@
+ifeq ($(strip $(MESA_BUILD_R600G)),true)
+
+LOCAL_PATH := $(call my-dir)
+
+# from Makefile
+C_SOURCES = \
+ r600_asm.c \
+ r600_blit.c \
+ r600_buffer.c \
+ r600_helper.c \
+ r600_pipe.c \
+ r600_query.c \
+ r600_resource.c \
+ r600_shader.c \
+ r600_state.c \
+ r600_texture.c \
+ r700_asm.c \
+ evergreen_state.c \
+ eg_asm.c \
+ r600_translate.c \
+ r600_state_common.c
+
+include $(CLEAR_VARS)
+
+LOCAL_SRC_FILES := \
+ $(C_SOURCES)
+
+LOCAL_CFLAGS := \
+ -std=c99 \
+ -fvisibility=hidden \
+ -Wno-sign-compare
+
+LOCAL_C_INCLUDES := \
+ external/mesa/src/gallium/include \
+ external/mesa/src/gallium/auxiliary \
+ external/drm \
+ external/drm/include/drm
+
+LOCAL_MODULE := libmesa_pipe_r600
+
+include $(BUILD_STATIC_LIBRARY)
+
+endif # MESA_BUILD_R600G
diff --git a/src/gallium/drivers/r600/SConscript b/src/gallium/drivers/r600/SConscript
index 3fc1fa94c2..5a5fa6d65f 100644
--- a/src/gallium/drivers/r600/SConscript
+++ b/src/gallium/drivers/r600/SConscript
@@ -9,7 +9,7 @@ except OSError:
Return()
env.Append(CPPPATH = [
- '#/include',
+ '#/include',
'#/src/mesa',
])
diff --git a/src/gallium/drivers/r600/eg_asm.c b/src/gallium/drivers/r600/eg_asm.c
index b79875c7c7..3793b919dd 100644
--- a/src/gallium/drivers/r600/eg_asm.c
+++ b/src/gallium/drivers/r600/eg_asm.c
@@ -35,15 +35,17 @@ int eg_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf)
switch (cf->inst) {
case (EG_V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3):
+ case (EG_V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER << 3):
+ case (EG_V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER << 3):
case (EG_V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE << 3):
bc->bytecode[id++] = S_SQ_CF_ALU_WORD0_ADDR(cf->addr >> 1) |
- S_SQ_CF_ALU_WORD0_KCACHE_MODE0(cf->kcache0_mode) |
- S_SQ_CF_ALU_WORD0_KCACHE_BANK0(cf->kcache0_bank) |
- S_SQ_CF_ALU_WORD0_KCACHE_BANK1(cf->kcache1_bank);
+ S_SQ_CF_ALU_WORD0_KCACHE_MODE0(cf->kcache[0].mode) |
+ S_SQ_CF_ALU_WORD0_KCACHE_BANK0(cf->kcache[0].bank) |
+ S_SQ_CF_ALU_WORD0_KCACHE_BANK1(cf->kcache[1].bank);
bc->bytecode[id++] = S_SQ_CF_ALU_WORD1_CF_INST(cf->inst >> 3) |
- S_SQ_CF_ALU_WORD1_KCACHE_MODE1(cf->kcache1_mode) |
- S_SQ_CF_ALU_WORD1_KCACHE_ADDR0(cf->kcache0_addr) |
- S_SQ_CF_ALU_WORD1_KCACHE_ADDR1(cf->kcache1_addr) |
+ S_SQ_CF_ALU_WORD1_KCACHE_MODE1(cf->kcache[1].mode) |
+ S_SQ_CF_ALU_WORD1_KCACHE_ADDR0(cf->kcache[0].addr) |
+ S_SQ_CF_ALU_WORD1_KCACHE_ADDR1(cf->kcache[1].addr) |
S_SQ_CF_ALU_WORD1_BARRIER(1) |
S_SQ_CF_ALU_WORD1_COUNT((cf->ndw / 2) - 1);
break;
@@ -60,7 +62,8 @@ int eg_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf)
S_SQ_CF_ALLOC_EXPORT_WORD0_ELEM_SIZE(cf->output.elem_size) |
S_SQ_CF_ALLOC_EXPORT_WORD0_ARRAY_BASE(cf->output.array_base) |
S_SQ_CF_ALLOC_EXPORT_WORD0_TYPE(cf->output.type);
- bc->bytecode[id++] = S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_X(cf->output.swizzle_x) |
+ bc->bytecode[id++] = S_SQ_CF_ALLOC_EXPORT_WORD1_BURST_COUNT(cf->output.burst_count - 1) |
+ S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_X(cf->output.swizzle_x) |
S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Y(cf->output.swizzle_y) |
S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Z(cf->output.swizzle_z) |
S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_W(cf->output.swizzle_w) |
@@ -90,37 +93,3 @@ int eg_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf)
}
return 0;
}
-
-void eg_cf_vtx(struct r600_vertex_element *ve, u32 *bytecode, unsigned count)
-{
- struct r600_pipe_state *rstate;
- unsigned i = 0;
-
- if (count > 8) {
- bytecode[i++] = S_SQ_CF_WORD0_ADDR(8 >> 1);
- bytecode[i++] = S_SQ_CF_WORD1_CF_INST(EG_V_SQ_CF_WORD1_SQ_CF_INST_VTX) |
- S_SQ_CF_WORD1_BARRIER(1) |
- S_SQ_CF_WORD1_COUNT(8 - 1);
- bytecode[i++] = S_SQ_CF_WORD0_ADDR(40 >> 1);
- bytecode[i++] = S_SQ_CF_WORD1_CF_INST(EG_V_SQ_CF_WORD1_SQ_CF_INST_VTX) |
- S_SQ_CF_WORD1_BARRIER(1) |
- S_SQ_CF_WORD1_COUNT(count - 8 - 1);
- } else {
- bytecode[i++] = S_SQ_CF_WORD0_ADDR(8 >> 1);
- bytecode[i++] = S_SQ_CF_WORD1_CF_INST(EG_V_SQ_CF_WORD1_SQ_CF_INST_VTX) |
- S_SQ_CF_WORD1_BARRIER(1) |
- S_SQ_CF_WORD1_COUNT(count - 1);
- }
- bytecode[i++] = S_SQ_CF_WORD0_ADDR(0);
- bytecode[i++] = S_SQ_CF_WORD1_CF_INST(EG_V_SQ_CF_WORD1_SQ_CF_INST_RETURN) |
- S_SQ_CF_WORD1_BARRIER(1);
-
- rstate = &ve->rstate;
- rstate->id = R600_PIPE_STATE_FETCH_SHADER;
- rstate->nregs = 0;
- r600_pipe_state_add_reg(rstate, R_0288A8_SQ_PGM_RESOURCES_FS,
- 0x00000000, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R_0288A4_SQ_PGM_START_FS,
- (r600_bo_offset(ve->fetch_shader)) >> 8,
- 0xFFFFFFFF, ve->fetch_shader);
-}
diff --git a/src/gallium/drivers/r600/eg_state_inlines.h b/src/gallium/drivers/r600/eg_state_inlines.h
index ecea1db4f1..cae3888051 100644
--- a/src/gallium/drivers/r600/eg_state_inlines.h
+++ b/src/gallium/drivers/r600/eg_state_inlines.h
@@ -253,9 +253,13 @@ static inline unsigned r600_tex_dim(unsigned dim)
default:
case PIPE_TEXTURE_1D:
return V_030000_SQ_TEX_DIM_1D;
+ case PIPE_TEXTURE_1D_ARRAY:
+ return V_030000_SQ_TEX_DIM_1D_ARRAY;
case PIPE_TEXTURE_2D:
case PIPE_TEXTURE_RECT:
return V_030000_SQ_TEX_DIM_2D;
+ case PIPE_TEXTURE_2D_ARRAY:
+ return V_030000_SQ_TEX_DIM_2D_ARRAY;
case PIPE_TEXTURE_3D:
return V_030000_SQ_TEX_DIM_3D;
case PIPE_TEXTURE_CUBE:
@@ -289,10 +293,14 @@ static inline uint32_t r600_translate_colorswap(enum pipe_format format)
{
switch (format) {
/* 8-bit buffers. */
+ case PIPE_FORMAT_L4A4_UNORM:
+ return V_028C70_SWAP_ALT;
+
case PIPE_FORMAT_A8_UNORM:
return V_028C70_SWAP_ALT_REV;
case PIPE_FORMAT_I8_UNORM:
case PIPE_FORMAT_L8_UNORM:
+ case PIPE_FORMAT_L8_SRGB:
case PIPE_FORMAT_R8_UNORM:
case PIPE_FORMAT_R8_SNORM:
return V_028C70_SWAP_STD;
@@ -313,6 +321,7 @@ static inline uint32_t r600_translate_colorswap(enum pipe_format format)
return V_028C70_SWAP_STD;
case PIPE_FORMAT_L8A8_UNORM:
+ case PIPE_FORMAT_L8A8_SRGB:
return V_028C70_SWAP_ALT;
case PIPE_FORMAT_R8G8_UNORM:
return V_028C70_SWAP_STD;
@@ -352,9 +361,11 @@ static inline uint32_t r600_translate_colorswap(enum pipe_format format)
case PIPE_FORMAT_R10G10B10A2_UNORM:
case PIPE_FORMAT_R10G10B10X2_SNORM:
- case PIPE_FORMAT_B10G10R10A2_UNORM:
case PIPE_FORMAT_R10SG10SB10SA2U_NORM:
- return V_028C70_SWAP_STD_REV;
+ return V_028C70_SWAP_STD;
+
+ case PIPE_FORMAT_B10G10R10A2_UNORM:
+ return V_028C70_SWAP_ALT;
case PIPE_FORMAT_R16G16_UNORM:
return V_028C70_SWAP_STD;
@@ -362,14 +373,13 @@ static inline uint32_t r600_translate_colorswap(enum pipe_format format)
/* 64-bit buffers. */
case PIPE_FORMAT_R16G16B16A16_UNORM:
case PIPE_FORMAT_R16G16B16A16_SNORM:
- // return V_028C70_COLOR_16_16_16_16;
case PIPE_FORMAT_R16G16B16A16_FLOAT:
- // return V_028C70_COLOR_16_16_16_16_FLOAT;
/* 128-bit buffers. */
case PIPE_FORMAT_R32G32B32A32_FLOAT:
- // return V_028C70_COLOR_32_32_32_32_FLOAT;
- return 0;
+ case PIPE_FORMAT_R32G32B32A32_SNORM:
+ case PIPE_FORMAT_R32G32B32A32_UNORM:
+ return V_028C70_SWAP_STD;
default:
R600_ERR("unsupported colorswap format %d\n", format);
return ~0;
@@ -381,9 +391,13 @@ static INLINE uint32_t r600_translate_colorformat(enum pipe_format format)
{
switch (format) {
/* 8-bit buffers. */
+ case PIPE_FORMAT_L4A4_UNORM:
+ return V_028C70_COLOR_4_4;
+
case PIPE_FORMAT_A8_UNORM:
case PIPE_FORMAT_I8_UNORM:
case PIPE_FORMAT_L8_UNORM:
+ case PIPE_FORMAT_L8_SRGB:
case PIPE_FORMAT_R8_UNORM:
case PIPE_FORMAT_R8_SNORM:
return V_028C70_COLOR_8;
@@ -404,6 +418,7 @@ static INLINE uint32_t r600_translate_colorformat(enum pipe_format format)
return V_028C70_COLOR_16;
case PIPE_FORMAT_L8A8_UNORM:
+ case PIPE_FORMAT_L8A8_SRGB:
case PIPE_FORMAT_R8G8_UNORM:
return V_028C70_COLOR_8_8;
@@ -430,7 +445,7 @@ static INLINE uint32_t r600_translate_colorformat(enum pipe_format format)
case PIPE_FORMAT_R10G10B10X2_SNORM:
case PIPE_FORMAT_B10G10R10A2_UNORM:
case PIPE_FORMAT_R10SG10SB10SA2U_NORM:
- return V_028C70_COLOR_10_10_10_2;
+ return V_028C70_COLOR_2_10_10_10;
case PIPE_FORMAT_Z24X8_UNORM:
case PIPE_FORMAT_Z24_UNORM_S8_USCALED:
@@ -471,6 +486,9 @@ static INLINE uint32_t r600_translate_colorformat(enum pipe_format format)
return V_028C70_COLOR_32_32;
/* 128-bit buffers. */
+ case PIPE_FORMAT_R32G32B32A32_SNORM:
+ case PIPE_FORMAT_R32G32B32A32_UNORM:
+ return V_028C70_COLOR_32_32_32_32;
case PIPE_FORMAT_R32G32B32_FLOAT:
return V_028C70_COLOR_32_32_32_FLOAT;
case PIPE_FORMAT_R32G32B32A32_FLOAT:
@@ -485,9 +503,9 @@ static INLINE uint32_t r600_translate_colorformat(enum pipe_format format)
}
}
-static INLINE boolean r600_is_sampler_format_supported(enum pipe_format format)
+static INLINE boolean r600_is_sampler_format_supported(struct pipe_screen *screen, enum pipe_format format)
{
- return r600_translate_texformat(format, NULL, NULL, NULL) != ~0;
+ return r600_translate_texformat(screen, format, NULL, NULL, NULL) != ~0;
}
static INLINE boolean r600_is_colorbuffer_format_supported(enum pipe_format format)
@@ -501,144 +519,4 @@ static INLINE boolean r600_is_zs_format_supported(enum pipe_format format)
return r600_translate_dbformat(format) != ~0;
}
-static INLINE boolean r600_is_vertex_format_supported(enum pipe_format format)
-{
- return r600_translate_colorformat(format) != ~0;
-}
-
-static INLINE uint32_t r600_translate_vertex_data_type(enum pipe_format format)
-{
- uint32_t result = 0;
- const struct util_format_description *desc;
- unsigned i;
-
- desc = util_format_description(format);
- if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN) {
- goto out_unknown;
- }
-
- /* Find the first non-VOID channel. */
- for (i = 0; i < 4; i++) {
- if (desc->channel[i].type != UTIL_FORMAT_TYPE_VOID) {
- break;
- }
- }
-
- switch (desc->channel[i].type) {
- /* Half-floats, floats, doubles */
- case UTIL_FORMAT_TYPE_FLOAT:
- switch (desc->channel[i].size) {
- case 16:
- switch (desc->nr_channels) {
- case 1:
- result = FMT_16_FLOAT;
- break;
- case 2:
- result = FMT_16_16_FLOAT;
- break;
- case 3:
- result = FMT_16_16_16_FLOAT;
- break;
- case 4:
- result = FMT_16_16_16_16_FLOAT;
- break;
- }
- break;
- case 32:
- switch (desc->nr_channels) {
- case 1:
- result = FMT_32_FLOAT;
- break;
- case 2:
- result = FMT_32_32_FLOAT;
- break;
- case 3:
- result = FMT_32_32_32_FLOAT;
- break;
- case 4:
- result = FMT_32_32_32_32_FLOAT;
- break;
- }
- break;
- default:
- goto out_unknown;
- }
- break;
- /* Unsigned ints */
- case UTIL_FORMAT_TYPE_UNSIGNED:
- /* Signed ints */
- case UTIL_FORMAT_TYPE_SIGNED:
- switch (desc->channel[i].size) {
- case 8:
- switch (desc->nr_channels) {
- case 1:
- result = FMT_8;
- break;
- case 2:
- result = FMT_8_8;
- break;
- case 3:
-// result = V_038008_FMT_8_8_8; /* fails piglit draw-vertices test */
-// break;
- case 4:
- result = FMT_8_8_8_8;
- break;
- }
- break;
- case 16:
- switch (desc->nr_channels) {
- case 1:
- result = FMT_16;
- break;
- case 2:
- result = FMT_16_16;
- break;
- case 3:
-// result = V_038008_FMT_16_16_16; /* fails piglit draw-vertices test */
-// break;
- case 4:
- result = FMT_16_16_16_16;
- break;
- }
- break;
- case 32:
- switch (desc->nr_channels) {
- case 1:
- result = FMT_32;
- break;
- case 2:
- result = FMT_32_32;
- break;
- case 3:
- result = FMT_32_32_32;
- break;
- case 4:
- result = FMT_32_32_32_32;
- break;
- }
- break;
- default:
- goto out_unknown;
- }
- break;
- default:
- goto out_unknown;
- }
-
- result = S_030008_DATA_FORMAT(result);
-
- if (desc->channel[i].type == UTIL_FORMAT_TYPE_SIGNED) {
- result |= S_030008_FORMAT_COMP_ALL(1);
- }
- if (desc->channel[i].normalized) {
- result |= S_030008_NUM_FORMAT_ALL(0);
- } else {
- result |= S_030008_NUM_FORMAT_ALL(2);
- }
- return result;
-out_unknown:
- R600_ERR("unsupported vertex format %s\n", util_format_name(format));
- return ~0;
-}
-
#endif
diff --git a/src/gallium/drivers/r600/eg_states_inc.h b/src/gallium/drivers/r600/eg_states_inc.h
deleted file mode 100644
index 1379c11291..0000000000
--- a/src/gallium/drivers/r600/eg_states_inc.h
+++ /dev/null
@@ -1,458 +0,0 @@
-/* This file is autogenerated from eg_states.h - do not edit directly */
-/* autogenerating script is gen_eg_states.py */
-
-/* EG_CONFIG */
-#define EG_CONFIG__SQ_CONFIG 0
-#define EG_CONFIG__SPI_CONFIG_CNTL 1
-#define EG_CONFIG__SPI_CONFIG_CNTL_1 2
-#define EG_CONFIG__SQ_GPR_RESOURCE_MGMT_1 3
-#define EG_CONFIG__SQ_GPR_RESOURCE_MGMT_2 4
-#define EG_CONFIG__SQ_GPR_RESOURCE_MGMT_3 5
-#define EG_CONFIG__SQ_THREAD_RESOURCE_MGMT_1 6
-#define EG_CONFIG__SQ_THREAD_RESOURCE_MGMT_2 7
-#define EG_CONFIG__SQ_STACK_RESOURCE_MGMT_1 8
-#define EG_CONFIG__SQ_STACK_RESOURCE_MGMT_2 9
-#define EG_CONFIG__SQ_STACK_RESOURCE_MGMT_3 10
-#define EG_CONFIG__SQ_DYN_GPR_CNTL_PS_FLUSH_REQ 11
-#define EG_CONFIG__PA_CL_ENHANCE 12
-#define EG_CONFIG__SQ_DYN_GPR_RESOURCE_LIMIT_1 13
-#define EG_CONFIG__SQ_LDS_ALLOC_PS 14
-#define EG_CONFIG__SX_MISC 15
-#define EG_CONFIG__SQ_ESGS_RING_ITEMSIZE 16
-#define EG_CONFIG__SQ_GSVS_RING_ITEMSIZE 17
-#define EG_CONFIG__SQ_ESTMP_RING_ITEMSIZE 18
-#define EG_CONFIG__SQ_GSTMP_RING_ITEMSIZE 19
-#define EG_CONFIG__SQ_VSTMP_RING_ITEMSIZE 20
-#define EG_CONFIG__SQ_PSTMP_RING_ITEMSIZE 21
-#define EG_CONFIG__SQ_GS_VERT_ITEMSIZE 22
-#define EG_CONFIG__SQ_GS_VERT_ITEMSIZE_1 23
-#define EG_CONFIG__SQ_GS_VERT_ITEMSIZE_2 24
-#define EG_CONFIG__SQ_GS_VERT_ITEMSIZE_3 25
-#define EG_CONFIG__VGT_OUTPUT_PATH_CNTL 26
-#define EG_CONFIG__VGT_HOS_CNTL 27
-#define EG_CONFIG__VGT_HOS_MAX_TESS_LEVEL 28
-#define EG_CONFIG__VGT_HOS_MIN_TESS_LEVEL 29
-#define EG_CONFIG__VGT_HOS_REUSE_DEPTH 30
-#define EG_CONFIG__VGT_GROUP_PRIM_TYPE 31
-#define EG_CONFIG__VGT_GROUP_FIRST_DECR 32
-#define EG_CONFIG__VGT_GROUP_DECR 33
-#define EG_CONFIG__VGT_GROUP_VECT_0_CNTL 34
-#define EG_CONFIG__VGT_GROUP_VECT_1_CNTL 35
-#define EG_CONFIG__VGT_GROUP_VECT_0_FMT_CNTL 36
-#define EG_CONFIG__VGT_GROUP_VECT_1_FMT_CNTL 37
-#define EG_CONFIG__VGT_GS_MODE 38
-#define EG_CONFIG__PA_SC_MODE_CNTL_0 39
-#define EG_CONFIG__PA_SC_MODE_CNTL_1 40
-#define EG_CONFIG__VGT_REUSE_OFF 41
-#define EG_CONFIG__VGT_VTX_CNT_EN 42
-#define EG_CONFIG__VGT_SHADER_STAGES_EN 43
-#define EG_CONFIG__VGT_STRMOUT_CONFIG 44
-#define EG_CONFIG__VGT_STRMOUT_BUFFER_CONFIG 45
-#define EG_CONFIG_SIZE 46
-#define EG_CONFIG_PM4 128
-
-/* EG_CB_CNTL */
-#define EG_CB_CNTL__CB_TARGET_MASK 0
-#define EG_CB_CNTL__CB_SHADER_MASK 1
-#define EG_CB_CNTL__CB_COLOR_CONTROL 2
-#define EG_CB_CNTL__PA_SC_AA_CONFIG 3
-#define EG_CB_CNTL__PA_SC_AA_SAMPLE_LOCS_MCTX 4
-#define EG_CB_CNTL__PA_SC_AA_MASK 5
-#define EG_CB_CNTL_SIZE 6
-#define EG_CB_CNTL_PM4 128
-
-/* EG_RASTERIZER */
-#define EG_RASTERIZER__SPI_INTERP_CONTROL_0 0
-#define EG_RASTERIZER__PA_CL_CLIP_CNTL 1
-#define EG_RASTERIZER__PA_SU_SC_MODE_CNTL 2
-#define EG_RASTERIZER__PA_CL_VS_OUT_CNTL 3
-#define EG_RASTERIZER__PA_CL_NANINF_CNTL 4
-#define EG_RASTERIZER__PA_SU_POINT_SIZE 5
-#define EG_RASTERIZER__PA_SU_POINT_MINMAX 6
-#define EG_RASTERIZER__PA_SU_LINE_CNTL 7
-#define EG_RASTERIZER__PA_SC_MPASS_PS_CNTL 8
-#define EG_RASTERIZER__PA_SC_LINE_CNTL 9
-#define EG_RASTERIZER__PA_SU_VTX_CNTL 10
-#define EG_RASTERIZER__PA_CL_GB_VERT_CLIP_ADJ 11
-#define EG_RASTERIZER__PA_CL_GB_VERT_DISC_ADJ 12
-#define EG_RASTERIZER__PA_CL_GB_HORZ_CLIP_ADJ 13
-#define EG_RASTERIZER__PA_CL_GB_HORZ_DISC_ADJ 14
-#define EG_RASTERIZER__PA_SU_POLY_OFFSET_DB_FMT_CNTL 15
-#define EG_RASTERIZER__PA_SU_POLY_OFFSET_CLAMP 16
-#define EG_RASTERIZER__PA_SU_POLY_OFFSET_FRONT_SCALE 17
-#define EG_RASTERIZER__PA_SU_POLY_OFFSET_FRONT_OFFSET 18
-#define EG_RASTERIZER__PA_SU_POLY_OFFSET_BACK_SCALE 19
-#define EG_RASTERIZER__PA_SU_POLY_OFFSET_BACK_OFFSET 20
-#define EG_RASTERIZER_SIZE 21
-#define EG_RASTERIZER_PM4 128
-
-/* EG_VIEWPORT */
-#define EG_VIEWPORT__PA_SC_VPORT_ZMIN_0 0
-#define EG_VIEWPORT__PA_SC_VPORT_ZMAX_0 1
-#define EG_VIEWPORT__PA_CL_VPORT_XSCALE_0 2
-#define EG_VIEWPORT__PA_CL_VPORT_YSCALE_0 3
-#define EG_VIEWPORT__PA_CL_VPORT_ZSCALE_0 4
-#define EG_VIEWPORT__PA_CL_VPORT_XOFFSET_0 5
-#define EG_VIEWPORT__PA_CL_VPORT_YOFFSET_0 6
-#define EG_VIEWPORT__PA_CL_VPORT_ZOFFSET_0 7
-#define EG_VIEWPORT__PA_CL_VTE_CNTL 8
-#define EG_VIEWPORT_SIZE 9
-#define EG_VIEWPORT_PM4 128
-
-/* EG_SCISSOR */
-#define EG_SCISSOR__PA_SC_SCREEN_SCISSOR_TL 0
-#define EG_SCISSOR__PA_SC_SCREEN_SCISSOR_BR 1
-#define EG_SCISSOR__PA_SC_WINDOW_OFFSET 2
-#define EG_SCISSOR__PA_SC_WINDOW_SCISSOR_TL 3
-#define EG_SCISSOR__PA_SC_WINDOW_SCISSOR_BR 4
-#define EG_SCISSOR__PA_SC_CLIPRECT_RULE 5
-#define EG_SCISSOR__PA_SC_CLIPRECT_0_TL 6
-#define EG_SCISSOR__PA_SC_CLIPRECT_0_BR 7
-#define EG_SCISSOR__PA_SC_CLIPRECT_1_TL 8
-#define EG_SCISSOR__PA_SC_CLIPRECT_1_BR 9
-#define EG_SCISSOR__PA_SC_CLIPRECT_2_TL 10
-#define EG_SCISSOR__PA_SC_CLIPRECT_2_BR 11
-#define EG_SCISSOR__PA_SC_CLIPRECT_3_TL 12
-#define EG_SCISSOR__PA_SC_CLIPRECT_3_BR 13
-#define EG_SCISSOR__PA_SC_EDGERULE 14
-#define EG_SCISSOR__PA_SC_GENERIC_SCISSOR_TL 15
-#define EG_SCISSOR__PA_SC_GENERIC_SCISSOR_BR 16
-#define EG_SCISSOR__PA_SC_VPORT_SCISSOR_0_TL 17
-#define EG_SCISSOR__PA_SC_VPORT_SCISSOR_0_BR 18
-#define EG_SCISSOR__PA_SU_HARDWARE_SCREEN_OFFSET 19
-#define EG_SCISSOR_SIZE 20
-#define EG_SCISSOR_PM4 128
-
-/* EG_BLEND */
-#define EG_BLEND__CB_BLEND_RED 0
-#define EG_BLEND__CB_BLEND_GREEN 1
-#define EG_BLEND__CB_BLEND_BLUE 2
-#define EG_BLEND__CB_BLEND_ALPHA 3
-#define EG_BLEND__CB_BLEND0_CONTROL 4
-#define EG_BLEND__CB_BLEND1_CONTROL 5
-#define EG_BLEND__CB_BLEND2_CONTROL 6
-#define EG_BLEND__CB_BLEND3_CONTROL 7
-#define EG_BLEND__CB_BLEND4_CONTROL 8
-#define EG_BLEND__CB_BLEND5_CONTROL 9
-#define EG_BLEND__CB_BLEND6_CONTROL 10
-#define EG_BLEND__CB_BLEND7_CONTROL 11
-#define EG_BLEND_SIZE 12
-#define EG_BLEND_PM4 128
-
-/* EG_DSA */
-#define EG_DSA__DB_STENCIL_CLEAR 0
-#define EG_DSA__DB_DEPTH_CLEAR 1
-#define EG_DSA__SX_ALPHA_TEST_CONTROL 2
-#define EG_DSA__DB_STENCILREFMASK 3
-#define EG_DSA__DB_STENCILREFMASK_BF 4
-#define EG_DSA__SX_ALPHA_REF 5
-#define EG_DSA__SPI_FOG_CNTL 6
-#define EG_DSA__DB_DEPTH_CONTROL 7
-#define EG_DSA__DB_SHADER_CONTROL 8
-#define EG_DSA__DB_RENDER_CONTROL 9
-#define EG_DSA__DB_COUNT_CONTROL 10
-#define EG_DSA__DB_RENDER_OVERRIDE 11
-#define EG_DSA__DB_RENDER_OVERRIDE2 12
-#define EG_DSA__DB_SRESULTS_COMPARE_STATE0 13
-#define EG_DSA__DB_SRESULTS_COMPARE_STATE1 14
-#define EG_DSA__DB_PRELOAD_CONTROL 15
-#define EG_DSA__DB_ALPHA_TO_MASK 16
-#define EG_DSA_SIZE 17
-#define EG_DSA_PM4 128
-
-/* EG_VS_SHADER */
-#define EG_VS_SHADER__SQ_VTX_SEMANTIC_0 0
-#define EG_VS_SHADER__SQ_VTX_SEMANTIC_1 1
-#define EG_VS_SHADER__SQ_VTX_SEMANTIC_2 2
-#define EG_VS_SHADER__SQ_VTX_SEMANTIC_3 3
-#define EG_VS_SHADER__SQ_VTX_SEMANTIC_4 4
-#define EG_VS_SHADER__SQ_VTX_SEMANTIC_5 5
-#define EG_VS_SHADER__SQ_VTX_SEMANTIC_6 6
-#define EG_VS_SHADER__SQ_VTX_SEMANTIC_7 7
-#define EG_VS_SHADER__SQ_VTX_SEMANTIC_8 8
-#define EG_VS_SHADER__SQ_VTX_SEMANTIC_9 9
-#define EG_VS_SHADER__SQ_VTX_SEMANTIC_10 10
-#define EG_VS_SHADER__SQ_VTX_SEMANTIC_11 11
-#define EG_VS_SHADER__SQ_VTX_SEMANTIC_12 12
-#define EG_VS_SHADER__SQ_VTX_SEMANTIC_13 13
-#define EG_VS_SHADER__SQ_VTX_SEMANTIC_14 14
-#define EG_VS_SHADER__SQ_VTX_SEMANTIC_15 15
-#define EG_VS_SHADER__SQ_VTX_SEMANTIC_16 16
-#define EG_VS_SHADER__SQ_VTX_SEMANTIC_17 17
-#define EG_VS_SHADER__SQ_VTX_SEMANTIC_18 18
-#define EG_VS_SHADER__SQ_VTX_SEMANTIC_19 19
-#define EG_VS_SHADER__SQ_VTX_SEMANTIC_20 20
-#define EG_VS_SHADER__SQ_VTX_SEMANTIC_21 21
-#define EG_VS_SHADER__SQ_VTX_SEMANTIC_22 22
-#define EG_VS_SHADER__SQ_VTX_SEMANTIC_23 23
-#define EG_VS_SHADER__SQ_VTX_SEMANTIC_24 24
-#define EG_VS_SHADER__SQ_VTX_SEMANTIC_25 25
-#define EG_VS_SHADER__SQ_VTX_SEMANTIC_26 26
-#define EG_VS_SHADER__SQ_VTX_SEMANTIC_27 27
-#define EG_VS_SHADER__SQ_VTX_SEMANTIC_28 28
-#define EG_VS_SHADER__SQ_VTX_SEMANTIC_29 29
-#define EG_VS_SHADER__SQ_VTX_SEMANTIC_30 30
-#define EG_VS_SHADER__SQ_VTX_SEMANTIC_31 31
-#define EG_VS_SHADER__SPI_VS_OUT_ID_0 32
-#define EG_VS_SHADER__SPI_VS_OUT_ID_1 33
-#define EG_VS_SHADER__SPI_VS_OUT_ID_2 34
-#define EG_VS_SHADER__SPI_VS_OUT_ID_3 35
-#define EG_VS_SHADER__SPI_VS_OUT_ID_4 36
-#define EG_VS_SHADER__SPI_VS_OUT_ID_5 37
-#define EG_VS_SHADER__SPI_VS_OUT_ID_6 38
-#define EG_VS_SHADER__SPI_VS_OUT_ID_7 39
-#define EG_VS_SHADER__SPI_VS_OUT_ID_8 40
-#define EG_VS_SHADER__SPI_VS_OUT_ID_9 41
-#define EG_VS_SHADER__SPI_VS_OUT_CONFIG 42
-#define EG_VS_SHADER__SQ_PGM_START_VS 43
-#define EG_VS_SHADER__SQ_PGM_RESOURCES_VS 44
-#define EG_VS_SHADER__SQ_PGM_RESOURCES_2_VS 45
-#define EG_VS_SHADER__SQ_PGM_START_FS 46
-#define EG_VS_SHADER__SQ_PGM_RESOURCES_FS 47
-#define EG_VS_SHADER_SIZE 48
-#define EG_VS_SHADER_PM4 128
-
-/* EG_PS_SHADER */
-#define EG_PS_SHADER__SPI_PS_INPUT_CNTL_0 0
-#define EG_PS_SHADER__SPI_PS_INPUT_CNTL_1 1
-#define EG_PS_SHADER__SPI_PS_INPUT_CNTL_2 2
-#define EG_PS_SHADER__SPI_PS_INPUT_CNTL_3 3
-#define EG_PS_SHADER__SPI_PS_INPUT_CNTL_4 4
-#define EG_PS_SHADER__SPI_PS_INPUT_CNTL_5 5
-#define EG_PS_SHADER__SPI_PS_INPUT_CNTL_6 6
-#define EG_PS_SHADER__SPI_PS_INPUT_CNTL_7 7
-#define EG_PS_SHADER__SPI_PS_INPUT_CNTL_8 8
-#define EG_PS_SHADER__SPI_PS_INPUT_CNTL_9 9
-#define EG_PS_SHADER__SPI_PS_INPUT_CNTL_10 10
-#define EG_PS_SHADER__SPI_PS_INPUT_CNTL_11 11
-#define EG_PS_SHADER__SPI_PS_INPUT_CNTL_12 12
-#define EG_PS_SHADER__SPI_PS_INPUT_CNTL_13 13
-#define EG_PS_SHADER__SPI_PS_INPUT_CNTL_14 14
-#define EG_PS_SHADER__SPI_PS_INPUT_CNTL_15 15
-#define EG_PS_SHADER__SPI_PS_INPUT_CNTL_16 16
-#define EG_PS_SHADER__SPI_PS_INPUT_CNTL_17 17
-#define EG_PS_SHADER__SPI_PS_INPUT_CNTL_18 18
-#define EG_PS_SHADER__SPI_PS_INPUT_CNTL_19 19
-#define EG_PS_SHADER__SPI_PS_INPUT_CNTL_20 20
-#define EG_PS_SHADER__SPI_PS_INPUT_CNTL_21 21
-#define EG_PS_SHADER__SPI_PS_INPUT_CNTL_22 22
-#define EG_PS_SHADER__SPI_PS_INPUT_CNTL_23 23
-#define EG_PS_SHADER__SPI_PS_INPUT_CNTL_24 24
-#define EG_PS_SHADER__SPI_PS_INPUT_CNTL_25 25
-#define EG_PS_SHADER__SPI_PS_INPUT_CNTL_26 26
-#define EG_PS_SHADER__SPI_PS_INPUT_CNTL_27 27
-#define EG_PS_SHADER__SPI_PS_INPUT_CNTL_28 28
-#define EG_PS_SHADER__SPI_PS_INPUT_CNTL_29 29
-#define EG_PS_SHADER__SPI_PS_INPUT_CNTL_30 30
-#define EG_PS_SHADER__SPI_PS_INPUT_CNTL_31 31
-#define EG_PS_SHADER__SPI_THREAD_GROUPING 32
-#define EG_PS_SHADER__SPI_PS_IN_CONTROL_0 33
-#define EG_PS_SHADER__SPI_PS_IN_CONTROL_1 34
-#define EG_PS_SHADER__SPI_INPUT_Z 35
-#define EG_PS_SHADER__SPI_BARYC_CNTL 36
-#define EG_PS_SHADER__SPI_PS_IN_CONTROL_2 37
-#define EG_PS_SHADER__SPI_COMPUTE_INPUT_CNTL 38
-#define EG_PS_SHADER__SQ_PGM_START_PS 39
-#define EG_PS_SHADER__SQ_PGM_RESOURCES_PS 40
-#define EG_PS_SHADER__SQ_PGM_RESOURCES_2_PS 41
-#define EG_PS_SHADER__SQ_PGM_EXPORTS_PS 42
-#define EG_PS_SHADER_SIZE 43
-#define EG_PS_SHADER_PM4 128
-
-/* EG_UCP */
-#define EG_UCP__PA_CL_UCP0_X 0
-#define EG_UCP__PA_CL_UCP0_Y 1
-#define EG_UCP__PA_CL_UCP0_Z 2
-#define EG_UCP__PA_CL_UCP0_W 3
-#define EG_UCP__PA_CL_UCP1_X 4
-#define EG_UCP__PA_CL_UCP1_Y 5
-#define EG_UCP__PA_CL_UCP1_Z 6
-#define EG_UCP__PA_CL_UCP1_W 7
-#define EG_UCP__PA_CL_UCP2_X 8
-#define EG_UCP__PA_CL_UCP2_Y 9
-#define EG_UCP__PA_CL_UCP2_Z 10
-#define EG_UCP__PA_CL_UCP2_W 11
-#define EG_UCP__PA_CL_UCP3_X 12
-#define EG_UCP__PA_CL_UCP3_Y 13
-#define EG_UCP__PA_CL_UCP3_Z 14
-#define EG_UCP__PA_CL_UCP3_W 15
-#define EG_UCP__PA_CL_UCP4_X 16
-#define EG_UCP__PA_CL_UCP4_Y 17
-#define EG_UCP__PA_CL_UCP4_Z 18
-#define EG_UCP__PA_CL_UCP4_W 19
-#define EG_UCP__PA_CL_UCP5_X 20
-#define EG_UCP__PA_CL_UCP5_Y 21
-#define EG_UCP__PA_CL_UCP5_Z 22
-#define EG_UCP__PA_CL_UCP5_W 23
-#define EG_UCP_SIZE 24
-#define EG_UCP_PM4 128
-
-/* EG_VS_CBUF */
-#define EG_VS_CBUF__ALU_CONST_BUFFER_SIZE_VS_0 0
-#define EG_VS_CBUF__ALU_CONST_CACHE_VS_0 1
-#define EG_VS_CBUF_SIZE 2
-#define EG_VS_CBUF_PM4 128
-
-/* EG_PS_CBUF */
-#define EG_PS_CBUF__ALU_CONST_BUFFER_SIZE_PS_0 0
-#define EG_PS_CBUF__ALU_CONST_CACHE_PS_0 1
-#define EG_PS_CBUF_SIZE 2
-#define EG_PS_CBUF_PM4 128
-
-/* EG_PS_RESOURCE */
-#define EG_PS_RESOURCE__RESOURCE0_WORD0 0
-#define EG_PS_RESOURCE__RESOURCE0_WORD1 1
-#define EG_PS_RESOURCE__RESOURCE0_WORD2 2
-#define EG_PS_RESOURCE__RESOURCE0_WORD3 3
-#define EG_PS_RESOURCE__RESOURCE0_WORD4 4
-#define EG_PS_RESOURCE__RESOURCE0_WORD5 5
-#define EG_PS_RESOURCE__RESOURCE0_WORD6 6
-#define EG_PS_RESOURCE__RESOURCE0_WORD7 7
-#define EG_PS_RESOURCE_SIZE 8
-#define EG_PS_RESOURCE_PM4 128
-
-/* EG_VS_RESOURCE */
-#define EG_VS_RESOURCE__RESOURCE160_WORD0 0
-#define EG_VS_RESOURCE__RESOURCE160_WORD1 1
-#define EG_VS_RESOURCE__RESOURCE160_WORD2 2
-#define EG_VS_RESOURCE__RESOURCE160_WORD3 3
-#define EG_VS_RESOURCE__RESOURCE160_WORD4 4
-#define EG_VS_RESOURCE__RESOURCE160_WORD5 5
-#define EG_VS_RESOURCE__RESOURCE160_WORD6 6
-#define EG_VS_RESOURCE__RESOURCE160_WORD7 7
-#define EG_VS_RESOURCE_SIZE 8
-#define EG_VS_RESOURCE_PM4 128
-
-/* EG_FS_RESOURCE */
-#define EG_FS_RESOURCE__RESOURCE320_WORD0 0
-#define EG_FS_RESOURCE__RESOURCE320_WORD1 1
-#define EG_FS_RESOURCE__RESOURCE320_WORD2 2
-#define EG_FS_RESOURCE__RESOURCE320_WORD3 3
-#define EG_FS_RESOURCE__RESOURCE320_WORD4 4
-#define EG_FS_RESOURCE__RESOURCE320_WORD5 5
-#define EG_FS_RESOURCE__RESOURCE320_WORD6 6
-#define EG_FS_RESOURCE__RESOURCE320_WORD7 7
-#define EG_FS_RESOURCE_SIZE 8
-#define EG_FS_RESOURCE_PM4 128
-
-/* EG_GS_RESOURCE */
-#define EG_GS_RESOURCE__RESOURCE336_WORD0 0
-#define EG_GS_RESOURCE__RESOURCE336_WORD1 1
-#define EG_GS_RESOURCE__RESOURCE336_WORD2 2
-#define EG_GS_RESOURCE__RESOURCE336_WORD3 3
-#define EG_GS_RESOURCE__RESOURCE336_WORD4 4
-#define EG_GS_RESOURCE__RESOURCE336_WORD5 5
-#define EG_GS_RESOURCE__RESOURCE336_WORD6 6
-#define EG_GS_RESOURCE__RESOURCE336_WORD7 7
-#define EG_GS_RESOURCE_SIZE 8
-#define EG_GS_RESOURCE_PM4 128
-
-/* EG_PS_SAMPLER */
-#define EG_PS_SAMPLER__SQ_TEX_SAMPLER_WORD0_0 0
-#define EG_PS_SAMPLER__SQ_TEX_SAMPLER_WORD1_0 1
-#define EG_PS_SAMPLER__SQ_TEX_SAMPLER_WORD2_0 2
-#define EG_PS_SAMPLER_SIZE 3
-#define EG_PS_SAMPLER_PM4 128
-
-/* EG_VS_SAMPLER */
-#define EG_VS_SAMPLER__SQ_TEX_SAMPLER_WORD0_18 0
-#define EG_VS_SAMPLER__SQ_TEX_SAMPLER_WORD1_18 1
-#define EG_VS_SAMPLER__SQ_TEX_SAMPLER_WORD2_18 2
-#define EG_VS_SAMPLER_SIZE 3
-#define EG_VS_SAMPLER_PM4 128
-
-/* EG_GS_SAMPLER */
-#define EG_GS_SAMPLER__SQ_TEX_SAMPLER_WORD0_36 0
-#define EG_GS_SAMPLER__SQ_TEX_SAMPLER_WORD1_36 1
-#define EG_GS_SAMPLER__SQ_TEX_SAMPLER_WORD2_36 2
-#define EG_GS_SAMPLER_SIZE 3
-#define EG_GS_SAMPLER_PM4 128
-
-/* EG_PS_SAMPLER_BORDER */
-#define EG_PS_SAMPLER_BORDER__TD_PS_SAMPLER0_BORDER_INDEX 0
-#define EG_PS_SAMPLER_BORDER__TD_PS_SAMPLER0_BORDER_RED 1
-#define EG_PS_SAMPLER_BORDER__TD_PS_SAMPLER0_BORDER_GREEN 2
-#define EG_PS_SAMPLER_BORDER__TD_PS_SAMPLER0_BORDER_BLUE 3
-#define EG_PS_SAMPLER_BORDER__TD_PS_SAMPLER0_BORDER_ALPHA 4
-#define EG_PS_SAMPLER_BORDER_SIZE 5
-#define EG_PS_SAMPLER_BORDER_PM4 128
-
-/* EG_VS_SAMPLER_BORDER */
-#define EG_VS_SAMPLER_BORDER__TD_VS_SAMPLER0_BORDER_INDEX 0
-#define EG_VS_SAMPLER_BORDER__TD_VS_SAMPLER0_BORDER_RED 1
-#define EG_VS_SAMPLER_BORDER__TD_VS_SAMPLER0_BORDER_GREEN 2
-#define EG_VS_SAMPLER_BORDER__TD_VS_SAMPLER0_BORDER_BLUE 3
-#define EG_VS_SAMPLER_BORDER__TD_VS_SAMPLER0_BORDER_ALPHA 4
-#define EG_VS_SAMPLER_BORDER_SIZE 5
-#define EG_VS_SAMPLER_BORDER_PM4 128
-
-/* EG_GS_SAMPLER_BORDER */
-#define EG_GS_SAMPLER_BORDER__TD_GS_SAMPLER0_BORDER_INDEX 0
-#define EG_GS_SAMPLER_BORDER__TD_GS_SAMPLER0_BORDER_RED 1
-#define EG_GS_SAMPLER_BORDER__TD_GS_SAMPLER0_BORDER_GREEN 2
-#define EG_GS_SAMPLER_BORDER__TD_GS_SAMPLER0_BORDER_BLUE 3
-#define EG_GS_SAMPLER_BORDER__TD_GS_SAMPLER0_BORDER_ALPHA 4
-#define EG_GS_SAMPLER_BORDER_SIZE 5
-#define EG_GS_SAMPLER_BORDER_PM4 128
-
-/* EG_CB */
-#define EG_CB__CB_COLOR0_BASE 0
-#define EG_CB__CB_COLOR0_PITCH 1
-#define EG_CB__CB_COLOR0_SLICE 2
-#define EG_CB__CB_COLOR0_VIEW 3
-#define EG_CB__CB_COLOR0_INFO 4
-#define EG_CB__CB_COLOR0_ATTRIB 5
-#define EG_CB__CB_COLOR0_DIM 6
-#define EG_CB_SIZE 7
-#define EG_CB_PM4 128
-
-/* EG_DB */
-#define EG_DB__DB_HTILE_DATA_BASE 0
-#define EG_DB__DB_Z_INFO 1
-#define EG_DB__DB_STENCIL_INFO 2
-#define EG_DB__DB_DEPTH_SIZE 3
-#define EG_DB__DB_DEPTH_SLICE 4
-#define EG_DB__DB_DEPTH_VIEW 5
-#define EG_DB__DB_HTILE_SURFACE 6
-#define EG_DB__DB_Z_READ_BASE 7
-#define EG_DB__DB_STENCIL_READ_BASE 8
-#define EG_DB__DB_Z_WRITE_BASE 9
-#define EG_DB__DB_STENCIL_WRITE_BASE 10
-#define EG_DB_SIZE 11
-#define EG_DB_PM4 128
-
-/* EG_VGT */
-#define EG_VGT__VGT_PRIMITIVE_TYPE 0
-#define EG_VGT__VGT_MAX_VTX_INDX 1
-#define EG_VGT__VGT_MIN_VTX_INDX 2
-#define EG_VGT__VGT_INDX_OFFSET 3
-#define EG_VGT__VGT_DMA_INDEX_TYPE 4
-#define EG_VGT__VGT_PRIMITIVEID_EN 5
-#define EG_VGT__VGT_DMA_NUM_INSTANCES 6
-#define EG_VGT__VGT_MULTI_PRIM_IB_RESET_EN 7
-#define EG_VGT__VGT_INSTANCE_STEP_RATE_0 8
-#define EG_VGT__VGT_INSTANCE_STEP_RATE_1 9
-#define EG_VGT_SIZE 10
-#define EG_VGT_PM4 128
-
-/* EG_DRAW */
-#define EG_DRAW__VGT_NUM_INDICES 0
-#define EG_DRAW__VGT_DMA_BASE_HI 1
-#define EG_DRAW__VGT_DMA_BASE 2
-#define EG_DRAW__VGT_DRAW_INITIATOR 3
-#define EG_DRAW_SIZE 4
-#define EG_DRAW_PM4 128
-
-/* EG_VGT_EVENT */
-#define EG_VGT_EVENT__VGT_EVENT_INITIATOR 0
-#define EG_VGT_EVENT_SIZE 1
-#define EG_VGT_EVENT_PM4 128
-
-/* EG_CB_FLUSH */
-#define EG_CB_FLUSH_SIZE 0
-#define EG_CB_FLUSH_PM4 128
-
-/* EG_DB_FLUSH */
-#define EG_DB_FLUSH_SIZE 0
-#define EG_DB_FLUSH_PM4 128
-
diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c
index 9e1a5e1f98..77432661b6 100644
--- a/src/gallium/drivers/r600/evergreen_state.c
+++ b/src/gallium/drivers/r600/evergreen_state.c
@@ -103,7 +103,7 @@ static void *evergreen_create_blend_state(struct pipe_context *ctx,
}
blend->cb_target_mask = target_mask;
r600_pipe_state_add_reg(rstate, R_028808_CB_COLOR_CONTROL,
- color_control, 0xFFFFFFFF, NULL);
+ color_control, 0xFFFFFFFD, NULL);
r600_pipe_state_add_reg(rstate, R_028C3C_PA_SC_AA_MASK, 0xFFFFFFFF, 0xFFFFFFFF, NULL);
for (int i = 0; i < 8; i++) {
@@ -150,10 +150,6 @@ static void *evergreen_create_dsa_state(struct pipe_context *ctx,
rstate->id = R600_PIPE_STATE_DSA;
/* depth TODO some of those db_shader_control field depend on shader adjust mask & add it to shader */
- /* db_shader_control is 0xFFFFFFBE as Z_EXPORT_ENABLE (bit 0) will be
- * set by fragment shader if it export Z and KILL_ENABLE (bit 6) will
- * be set if shader use texkill instruction
- */
db_shader_control = S_02880C_Z_ORDER(V_02880C_EARLY_Z_THEN_LATE_Z);
stencil_ref_mask = 0;
stencil_ref_mask_bf = 0;
@@ -210,7 +206,10 @@ static void *evergreen_create_dsa_state(struct pipe_context *ctx,
r600_pipe_state_add_reg(rstate, R_028438_SX_ALPHA_REF, alpha_ref, 0xFFFFFFFF, NULL);
r600_pipe_state_add_reg(rstate, R_0286DC_SPI_FOG_CNTL, 0x00000000, 0xFFFFFFFF, NULL);
r600_pipe_state_add_reg(rstate, R_028800_DB_DEPTH_CONTROL, db_depth_control, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R_02880C_DB_SHADER_CONTROL, db_shader_control, 0xFFFFFFBE, NULL);
+ /* The DB_SHADER_CONTROL mask is 0xFFFFFFBC since Z_EXPORT_ENABLE,
+ * STENCIL_EXPORT_ENABLE and KILL_ENABLE are controlled by
+ * evergreen_pipe_shader_ps().*/
+ r600_pipe_state_add_reg(rstate, R_02880C_DB_SHADER_CONTROL, db_shader_control, 0xFFFFFFBC, NULL);
r600_pipe_state_add_reg(rstate, R_028000_DB_RENDER_CONTROL, db_render_control, 0xFFFFFFFF, NULL);
r600_pipe_state_add_reg(rstate, R_02800C_DB_RENDER_OVERRIDE, db_render_override, 0xFFFFFFFF, NULL);
r600_pipe_state_add_reg(rstate, R_028AC0_DB_SRESULTS_COMPARE_STATE0, 0x0, 0xFFFFFFFF, NULL);
@@ -305,11 +304,16 @@ static void *evergreen_create_sampler_state(struct pipe_context *ctx,
{
struct r600_pipe_state *rstate = CALLOC_STRUCT(r600_pipe_state);
union util_color uc;
+ uint32_t coord_trunc = 0;
if (rstate == NULL) {
return NULL;
}
+ if ((state->mag_img_filter == PIPE_TEX_FILTER_NEAREST) ||
+ (state->min_img_filter == PIPE_TEX_FILTER_NEAREST))
+ coord_trunc = 1;
+
rstate->id = R600_PIPE_STATE_SAMPLER;
util_pack_color(state->border_color, PIPE_FORMAT_B8G8R8A8_UNORM, &uc);
r600_pipe_state_add_reg(rstate, R_03C000_SQ_TEX_SAMPLER_WORD0_0,
@@ -328,6 +332,7 @@ static void *evergreen_create_sampler_state(struct pipe_context *ctx,
0xFFFFFFFF, NULL);
r600_pipe_state_add_reg(rstate, R_03C008_SQ_TEX_SAMPLER_WORD2_0,
S_03C008_LOD_BIAS(S_FIXED(CLAMP(state->lod_bias, -16, 16), 8)) |
+ S_03C008_MC_COORD_TRUNCATE(coord_trunc) |
S_03C008_TYPE(1),
0xFFFFFFFF, NULL);
@@ -351,7 +356,7 @@ static struct pipe_sampler_view *evergreen_create_sampler_view(struct pipe_conte
struct r600_resource *rbuffer;
unsigned format;
uint32_t word4 = 0, yuv_format = 0, pitch = 0;
- unsigned char swizzle[4];
+ unsigned char swizzle[4], array_mode = 0, tile_type = 0;
struct r600_bo *bo[2];
if (resource == NULL)
@@ -370,7 +375,7 @@ static struct pipe_sampler_view *evergreen_create_sampler_view(struct pipe_conte
swizzle[1] = state->swizzle_g;
swizzle[2] = state->swizzle_b;
swizzle[3] = state->swizzle_a;
- format = r600_translate_texformat(state->format,
+ format = r600_translate_texformat(ctx->screen, state->format,
swizzle,
&word4, &yuv_format);
if (format == ~0) {
@@ -380,36 +385,43 @@ static struct pipe_sampler_view *evergreen_create_sampler_view(struct pipe_conte
if (desc == NULL) {
R600_ERR("unknow format %d\n", state->format);
}
- tmp = (struct r600_resource_texture*)texture;
+ tmp = (struct r600_resource_texture *)texture;
+ if (tmp->depth && !tmp->is_flushing_texture) {
+ r600_texture_depth_flush(ctx, texture, TRUE);
+ tmp = tmp->flushed_depth_texture;
+ }
+
+ if (tmp->force_int_type) {
+ word4 &= C_030010_NUM_FORMAT_ALL;
+ word4 |= S_030010_NUM_FORMAT_ALL(V_030010_SQ_NUM_FORMAT_INT);
+ }
+
rbuffer = &tmp->resource;
bo[0] = rbuffer->bo;
bo[1] = rbuffer->bo;
- /* FIXME depth texture decompression */
- if (tmp->depth) {
- r600_texture_depth_flush(ctx, texture);
- tmp = (struct r600_resource_texture*)texture;
- rbuffer = &tmp->flushed_depth_texture->resource;
- bo[0] = rbuffer->bo;
- bo[1] = rbuffer->bo;
- }
- pitch = align(tmp->pitch_in_pixels[0], 8);
+
+ pitch = align(tmp->pitch_in_blocks[0] * util_format_get_blockwidth(state->format), 8);
+ array_mode = tmp->array_mode[0];
+ tile_type = tmp->tile_type;
/* FIXME properly handle first level != 0 */
r600_pipe_state_add_reg(rstate, R_030000_RESOURCE0_WORD0,
S_030000_DIM(r600_tex_dim(texture->target)) |
S_030000_PITCH((pitch / 8) - 1) |
+ S_030000_NON_DISP_TILING_ORDER(tile_type) |
S_030000_TEX_WIDTH(texture->width0 - 1), 0xFFFFFFFF, NULL);
r600_pipe_state_add_reg(rstate, R_030004_RESOURCE0_WORD1,
S_030004_TEX_HEIGHT(texture->height0 - 1) |
- S_030004_TEX_DEPTH(texture->depth0 - 1),
+ S_030004_TEX_DEPTH(texture->depth0 - 1) |
+ S_030004_ARRAY_MODE(array_mode),
0xFFFFFFFF, NULL);
r600_pipe_state_add_reg(rstate, R_030008_RESOURCE0_WORD2,
(tmp->offset[0] + r600_bo_offset(bo[0])) >> 8, 0xFFFFFFFF, bo[0]);
r600_pipe_state_add_reg(rstate, R_03000C_RESOURCE0_WORD3,
(tmp->offset[1] + r600_bo_offset(bo[1])) >> 8, 0xFFFFFFFF, bo[1]);
r600_pipe_state_add_reg(rstate, R_030010_RESOURCE0_WORD4,
- word4 | S_030010_NUM_FORMAT_ALL(V_030010_SQ_NUM_FORMAT_NORM) |
- S_030010_SRF_MODE_ALL(V_030010_SFR_MODE_NO_ZERO) |
+ word4 |
+ S_030010_SRF_MODE_ALL(V_030010_SRF_MODE_NO_ZERO) |
S_030010_BASE_LEVEL(state->u.tex.first_level), 0xFFFFFFFF, NULL);
r600_pipe_state_add_reg(rstate, R_030014_RESOURCE0_WORD5,
S_030014_LAST_LEVEL(state->u.tex.last_level) |
@@ -431,7 +443,8 @@ static void evergreen_set_vs_sampler_view(struct pipe_context *ctx, unsigned cou
for (int i = 0; i < count; i++) {
if (resource[i]) {
- evergreen_context_pipe_state_set_vs_resource(&rctx->ctx, &resource[i]->state, i);
+ evergreen_context_pipe_state_set_vs_resource(&rctx->ctx, &resource[i]->state,
+ i + R600_MAX_CONST_BUFFERS);
}
}
}
@@ -446,9 +459,11 @@ static void evergreen_set_ps_sampler_view(struct pipe_context *ctx, unsigned cou
for (i = 0; i < count; i++) {
if (&rctx->ps_samplers.views[i]->base != views[i]) {
if (resource[i])
- evergreen_context_pipe_state_set_ps_resource(&rctx->ctx, &resource[i]->state, i);
+ evergreen_context_pipe_state_set_ps_resource(&rctx->ctx, &resource[i]->state,
+ i + R600_MAX_CONST_BUFFERS);
else
- evergreen_context_pipe_state_set_ps_resource(&rctx->ctx, NULL, i);
+ evergreen_context_pipe_state_set_ps_resource(&rctx->ctx, NULL,
+ i + R600_MAX_CONST_BUFFERS);
pipe_sampler_view_reference(
(struct pipe_sampler_view **)&rctx->ps_samplers.views[i],
@@ -457,7 +472,8 @@ static void evergreen_set_ps_sampler_view(struct pipe_context *ctx, unsigned cou
}
for (i = count; i < NUM_TEX_UNITS; i++) {
if (rctx->ps_samplers.views[i]) {
- evergreen_context_pipe_state_set_ps_resource(&rctx->ctx, NULL, i);
+ evergreen_context_pipe_state_set_ps_resource(&rctx->ctx, NULL,
+ i + R600_MAX_CONST_BUFFERS);
pipe_sampler_view_reference((struct pipe_sampler_view **)&rctx->ps_samplers.views[i], NULL);
}
}
@@ -638,11 +654,19 @@ static void evergreen_cb(struct r600_pipe_context *rctx, struct r600_pipe_state
unsigned color_info;
unsigned format, swap, ntype;
unsigned offset;
+ unsigned tile_type;
const struct util_format_description *desc;
struct r600_bo *bo[3];
+ int i;
surf = (struct r600_surface *)state->cbufs[cb];
rtex = (struct r600_resource_texture*)state->cbufs[cb]->texture;
+
+ if (rtex->depth && !rtex->is_flushing_texture) {
+ r600_texture_depth_flush(&rctx->context, state->cbufs[cb]->texture, TRUE);
+ rtex = rtex->flushed_depth_texture;
+ }
+
rbuffer = &rtex->resource;
bo[0] = rbuffer->bo;
bo[1] = rbuffer->bo;
@@ -651,21 +675,43 @@ static void evergreen_cb(struct r600_pipe_context *rctx, struct r600_pipe_state
/* XXX quite sure for dx10+ hw don't need any offset hacks */
offset = r600_texture_get_offset((struct r600_resource_texture *)state->cbufs[cb]->texture,
level, state->cbufs[cb]->u.tex.first_layer);
- pitch = rtex->pitch_in_pixels[level] / 8 - 1;
- slice = rtex->pitch_in_pixels[level] * surf->aligned_height / 64 - 1;
+ pitch = rtex->pitch_in_blocks[level] / 8 - 1;
+ slice = rtex->pitch_in_blocks[level] * surf->aligned_height / 64 - 1;
ntype = 0;
- desc = util_format_description(rtex->resource.base.b.format);
+ desc = util_format_description(surf->base.format);
if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB)
ntype = V_028C70_NUMBER_SRGB;
- format = r600_translate_colorformat(rtex->resource.base.b.format);
- swap = r600_translate_colorswap(rtex->resource.base.b.format);
+ format = r600_translate_colorformat(surf->base.format);
+ swap = r600_translate_colorswap(surf->base.format);
+
+ /* disable when gallium grows int textures */
+ if ((format == FMT_32_32_32_32 || format == FMT_16_16_16_16) && rtex->force_int_type)
+ ntype = 4;
+
color_info = S_028C70_FORMAT(format) |
S_028C70_COMP_SWAP(swap) |
+ S_028C70_ARRAY_MODE(rtex->array_mode[level]) |
S_028C70_BLEND_CLAMP(1) |
S_028C70_NUMBER_TYPE(ntype);
- if (desc->colorspace != UTIL_FORMAT_COLORSPACE_ZS)
- color_info |= S_028C70_SOURCE_FORMAT(1);
+
+ for (i = 0; i < 4; i++) {
+ if (desc->channel[i].type != UTIL_FORMAT_TYPE_VOID) {
+ break;
+ }
+ }
+
+ /* we can only set the export size if any thing is snorm/unorm component is > 11 bits,
+ if we aren't a float, sint or uint */
+ if (desc->colorspace != UTIL_FORMAT_COLORSPACE_ZS &&
+ desc->channel[i].size < 12 && desc->channel[i].type != UTIL_FORMAT_TYPE_FLOAT &&
+ ntype != 4 && ntype != 5)
+ color_info |= S_028C70_SOURCE_FORMAT(V_028C70_EXPORT_4C_16BPC);
+
+ if (rtex->array_mode[level] > V_028C70_ARRAY_LINEAR_ALIGNED) {
+ tile_type = rtex->tile_type;
+ } else /* workaround for linear buffers */
+ tile_type = 1;
/* FIXME handle enabling of CB beyond BASE8 which has different offset */
r600_pipe_state_add_reg(rstate,
@@ -690,7 +736,7 @@ static void evergreen_cb(struct r600_pipe_context *rctx, struct r600_pipe_state
0x00000000, 0xFFFFFFFF, NULL);
r600_pipe_state_add_reg(rstate,
R_028C74_CB_COLOR0_ATTRIB + cb * 0x3C,
- S_028C74_NON_DISP_TILING_ORDER(1),
+ S_028C74_NON_DISP_TILING_ORDER(tile_type),
0xFFFFFFFF, bo[0]);
}
@@ -711,17 +757,14 @@ static void evergreen_db(struct r600_pipe_context *rctx, struct r600_pipe_state
surf = (struct r600_surface *)state->zsbuf;
rtex = (struct r600_resource_texture*)state->zsbuf->texture;
- rtex->tiled = 1;
- rtex->array_mode[level] = 2;
- rtex->tile_type = 1;
- rtex->depth = 1;
+
rbuffer = &rtex->resource;
/* XXX quite sure for dx10+ hw don't need any offset hacks */
offset = r600_texture_get_offset((struct r600_resource_texture *)state->zsbuf->texture,
level, state->zsbuf->u.tex.first_layer);
- pitch = rtex->pitch_in_pixels[level] / 8 - 1;
- slice = rtex->pitch_in_pixels[level] * surf->aligned_height / 64 - 1;
+ pitch = rtex->pitch_in_blocks[level] / 8 - 1;
+ slice = rtex->pitch_in_blocks[level] * surf->aligned_height / 64 - 1;
format = r600_translate_dbformat(state->zsbuf->texture->format);
stencil_format = r600_translate_stencilformat(state->zsbuf->texture->format);
@@ -770,8 +813,6 @@ static void evergreen_set_framebuffer_state(struct pipe_context *ctx,
util_copy_framebuffer_state(&rctx->framebuffer, state);
- rctx->pframebuffer = &rctx->framebuffer;
-
/* build states */
for (int i = 0; i < state->nr_cbufs; i++) {
evergreen_cb(rctx, rstate, state, i);
@@ -839,48 +880,6 @@ static void evergreen_set_framebuffer_state(struct pipe_context *ctx,
}
}
-static void evergreen_set_constant_buffer(struct pipe_context *ctx, uint shader, uint index,
- struct pipe_resource *buffer)
-{
- struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
- struct r600_resource *rbuffer = (struct r600_resource*)buffer;
-
- /* Note that the state tracker can unbind constant buffers by
- * passing NULL here.
- */
- if (buffer == NULL) {
- return;
- }
-
- switch (shader) {
- case PIPE_SHADER_VERTEX:
- rctx->vs_const_buffer.nregs = 0;
- r600_pipe_state_add_reg(&rctx->vs_const_buffer,
- R_028180_ALU_CONST_BUFFER_SIZE_VS_0,
- ALIGN_DIVUP(buffer->width0 >> 4, 16),
- 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(&rctx->vs_const_buffer,
- R_028980_ALU_CONST_CACHE_VS_0,
- (r600_bo_offset(rbuffer->bo)) >> 8, 0xFFFFFFFF, rbuffer->bo);
- r600_context_pipe_state_set(&rctx->ctx, &rctx->vs_const_buffer);
- break;
- case PIPE_SHADER_FRAGMENT:
- rctx->ps_const_buffer.nregs = 0;
- r600_pipe_state_add_reg(&rctx->ps_const_buffer,
- R_028140_ALU_CONST_BUFFER_SIZE_PS_0,
- ALIGN_DIVUP(buffer->width0 >> 4, 16),
- 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(&rctx->ps_const_buffer,
- R_028940_ALU_CONST_CACHE_PS_0,
- (r600_bo_offset(rbuffer->bo)) >> 8, 0xFFFFFFFF, rbuffer->bo);
- r600_context_pipe_state_set(&rctx->ctx, &rctx->ps_const_buffer);
- break;
- default:
- R600_ERR("unsupported %d\n", shader);
- return;
- }
-}
-
void evergreen_init_state_functions(struct r600_pipe_context *rctx)
{
rctx->context.create_blend_state = evergreen_create_blend_state;
@@ -908,7 +907,7 @@ void evergreen_init_state_functions(struct r600_pipe_context *rctx)
rctx->context.delete_vs_state = r600_delete_vs_shader;
rctx->context.set_blend_color = evergreen_set_blend_color;
rctx->context.set_clip_state = evergreen_set_clip_state;
- rctx->context.set_constant_buffer = evergreen_set_constant_buffer;
+ rctx->context.set_constant_buffer = r600_set_constant_buffer;
rctx->context.set_fragment_sampler_views = evergreen_set_ps_sampler_view;
rctx->context.set_framebuffer_state = evergreen_set_framebuffer_state;
rctx->context.set_polygon_stipple = evergreen_set_polygon_stipple;
@@ -920,6 +919,7 @@ void evergreen_init_state_functions(struct r600_pipe_context *rctx)
rctx->context.set_vertex_sampler_views = evergreen_set_vs_sampler_view;
rctx->context.set_viewport_state = evergreen_set_viewport_state;
rctx->context.sampler_view_destroy = r600_sampler_view_destroy;
+ rctx->context.redefine_user_buffer = u_default_redefine_user_buffer;
}
void evergreen_init_config(struct r600_pipe_context *rctx)
@@ -1069,12 +1069,76 @@ void evergreen_init_config(struct r600_pipe_context *rctx)
num_hs_stack_entries = 42;
num_ls_stack_entries = 42;
break;
+ case CHIP_BARTS:
+ num_ps_gprs = 93;
+ num_vs_gprs = 46;
+ num_temp_gprs = 4;
+ num_gs_gprs = 31;
+ num_es_gprs = 31;
+ num_hs_gprs = 23;
+ num_ls_gprs = 23;
+ num_ps_threads = 128;
+ num_vs_threads = 20;
+ num_gs_threads = 20;
+ num_es_threads = 20;
+ num_hs_threads = 20;
+ num_ls_threads = 20;
+ num_ps_stack_entries = 85;
+ num_vs_stack_entries = 85;
+ num_gs_stack_entries = 85;
+ num_es_stack_entries = 85;
+ num_hs_stack_entries = 85;
+ num_ls_stack_entries = 85;
+ break;
+ case CHIP_TURKS:
+ num_ps_gprs = 93;
+ num_vs_gprs = 46;
+ num_temp_gprs = 4;
+ num_gs_gprs = 31;
+ num_es_gprs = 31;
+ num_hs_gprs = 23;
+ num_ls_gprs = 23;
+ num_ps_threads = 128;
+ num_vs_threads = 20;
+ num_gs_threads = 20;
+ num_es_threads = 20;
+ num_hs_threads = 20;
+ num_ls_threads = 20;
+ num_ps_stack_entries = 42;
+ num_vs_stack_entries = 42;
+ num_gs_stack_entries = 42;
+ num_es_stack_entries = 42;
+ num_hs_stack_entries = 42;
+ num_ls_stack_entries = 42;
+ break;
+ case CHIP_CAICOS:
+ num_ps_gprs = 93;
+ num_vs_gprs = 46;
+ num_temp_gprs = 4;
+ num_gs_gprs = 31;
+ num_es_gprs = 31;
+ num_hs_gprs = 23;
+ num_ls_gprs = 23;
+ num_ps_threads = 128;
+ num_vs_threads = 10;
+ num_gs_threads = 10;
+ num_es_threads = 10;
+ num_hs_threads = 10;
+ num_ls_threads = 10;
+ num_ps_stack_entries = 42;
+ num_vs_stack_entries = 42;
+ num_gs_stack_entries = 42;
+ num_es_stack_entries = 42;
+ num_hs_stack_entries = 42;
+ num_ls_stack_entries = 42;
+ break;
}
tmp = 0x00000000;
switch (family) {
case CHIP_CEDAR:
case CHIP_PALM:
+ case CHIP_CAICOS:
break;
default:
tmp |= S_008C00_VC_ENABLE(1);
@@ -1260,226 +1324,11 @@ void evergreen_polygon_offset_update(struct r600_pipe_context *rctx)
}
}
-static void evergreen_spi_update(struct r600_pipe_context *rctx)
-{
- struct r600_pipe_shader *shader = rctx->ps_shader;
- struct r600_pipe_state rstate;
- struct r600_shader *rshader = &shader->shader;
- unsigned i, tmp;
-
- rstate.nregs = 0;
- for (i = 0; i < rshader->ninput; i++) {
- tmp = S_028644_SEMANTIC(r600_find_vs_semantic_index(&rctx->vs_shader->shader, rshader, i));
- if (rshader->input[i].name == TGSI_SEMANTIC_COLOR ||
- rshader->input[i].name == TGSI_SEMANTIC_BCOLOR ||
- rshader->input[i].name == TGSI_SEMANTIC_POSITION) {
- tmp |= S_028644_FLAT_SHADE(rctx->flatshade);
- }
- if (rshader->input[i].name == TGSI_SEMANTIC_GENERIC &&
- rctx->sprite_coord_enable & (1 << rshader->input[i].sid)) {
- tmp |= S_028644_PT_SPRITE_TEX(1);
- }
- r600_pipe_state_add_reg(&rstate, R_028644_SPI_PS_INPUT_CNTL_0 + i * 4, tmp, 0xFFFFFFFF, NULL);
- }
- r600_context_pipe_state_set(&rctx->ctx, &rstate);
-}
-
-void evergreen_vertex_buffer_update(struct r600_pipe_context *rctx)
-{
- struct r600_pipe_state *rstate;
- struct r600_resource *rbuffer;
- struct pipe_vertex_buffer *vertex_buffer;
- unsigned i, offset;
-
- /* we don't update until we know vertex elements */
- if (rctx->vertex_elements == NULL || !rctx->nvertex_buffer)
- return;
-
- /* delete previous translated vertex elements */
- if (rctx->tran.new_velems) {
- r600_end_vertex_translate(rctx);
- }
-
- if (rctx->vertex_elements->incompatible_layout) {
- /* translate rebind new vertex elements so
- * return once translated
- */
- r600_begin_vertex_translate(rctx);
- return;
- }
-
- if (rctx->any_user_vbs) {
- r600_upload_user_buffers(rctx);
- rctx->any_user_vbs = FALSE;
- }
-
- if (rctx->vertex_elements->vbuffer_need_offset) {
- /* one resource per vertex elements */
- rctx->nvs_resource = rctx->vertex_elements->count;
- } else {
- /* bind vertex buffer once */
- rctx->nvs_resource = rctx->nvertex_buffer;
- }
-
- for (i = 0 ; i < rctx->nvs_resource; i++) {
- rstate = &rctx->vs_resource[i];
- rstate->id = R600_PIPE_STATE_RESOURCE;
- rstate->nregs = 0;
-
- if (rctx->vertex_elements->vbuffer_need_offset) {
- /* one resource per vertex elements */
- unsigned vbuffer_index;
- vbuffer_index = rctx->vertex_elements->elements[i].vertex_buffer_index;
- vertex_buffer = &rctx->vertex_buffer[vbuffer_index];
- rbuffer = (struct r600_resource*)vertex_buffer->buffer;
- offset = rctx->vertex_elements->vbuffer_offset[i] +
- vertex_buffer->buffer_offset +
- r600_bo_offset(rbuffer->bo);
- } else {
- /* bind vertex buffer once */
- vertex_buffer = &rctx->vertex_buffer[i];
- rbuffer = (struct r600_resource*)vertex_buffer->buffer;
- offset = vertex_buffer->buffer_offset +
- r600_bo_offset(rbuffer->bo);
- }
-
- r600_pipe_state_add_reg(rstate, R_030000_RESOURCE0_WORD0,
- offset, 0xFFFFFFFF, rbuffer->bo);
- r600_pipe_state_add_reg(rstate, R_030004_RESOURCE0_WORD1,
- rbuffer->size - offset - 1, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R_030008_RESOURCE0_WORD2,
- S_030008_STRIDE(vertex_buffer->stride),
- 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R_03000C_RESOURCE0_WORD3,
- S_03000C_DST_SEL_X(V_03000C_SQ_SEL_X) |
- S_03000C_DST_SEL_Y(V_03000C_SQ_SEL_Y) |
- S_03000C_DST_SEL_Z(V_03000C_SQ_SEL_Z) |
- S_03000C_DST_SEL_W(V_03000C_SQ_SEL_W),
- 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R_030010_RESOURCE0_WORD4,
- 0x00000000, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R_030014_RESOURCE0_WORD5,
- 0x00000000, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R_030018_RESOURCE0_WORD6,
- 0x00000000, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R_03001C_RESOURCE0_WORD7,
- 0xC0000000, 0xFFFFFFFF, NULL);
- evergreen_fs_resource_set(&rctx->ctx, rstate, i);
- }
-}
-
-int r600_conv_pipe_prim(unsigned pprim, unsigned *prim);
-void evergreen_draw(struct pipe_context *ctx, const struct pipe_draw_info *info)
-{
- struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
- struct r600_resource *rbuffer;
- u32 vgt_dma_index_type, vgt_draw_initiator, mask;
- struct r600_draw rdraw;
- struct r600_pipe_state vgt;
- struct r600_drawl draw;
- unsigned prim;
-
- memset(&draw, 0, sizeof(struct r600_drawl));
- draw.ctx = ctx;
- draw.mode = info->mode;
- draw.start = info->start;
- draw.count = info->count;
- if (info->indexed && rctx->index_buffer.buffer) {
- draw.start += rctx->index_buffer.offset / rctx->index_buffer.index_size;
- draw.min_index = info->min_index;
- draw.max_index = info->max_index;
- draw.index_bias = info->index_bias;
-
- r600_translate_index_buffer(rctx, &rctx->index_buffer.buffer,
- &rctx->index_buffer.index_size,
- &draw.start,
- info->count);
-
- draw.index_size = rctx->index_buffer.index_size;
- pipe_resource_reference(&draw.index_buffer, rctx->index_buffer.buffer);
- draw.index_buffer_offset = draw.start * draw.index_size;
- draw.start = 0;
- r600_upload_index_buffer(rctx, &draw);
- } else {
- draw.index_size = 0;
- draw.index_buffer = NULL;
- draw.min_index = info->min_index;
- draw.max_index = info->max_index;
- draw.index_bias = info->start;
- }
-
- switch (draw.index_size) {
- case 2:
- vgt_draw_initiator = 0;
- vgt_dma_index_type = 0;
- break;
- case 4:
- vgt_draw_initiator = 0;
- vgt_dma_index_type = 1;
- break;
- case 0:
- vgt_draw_initiator = 2;
- vgt_dma_index_type = 0;
- break;
- default:
- R600_ERR("unsupported index size %d\n", draw.index_size);
- return;
- }
- if (r600_conv_pipe_prim(draw.mode, &prim))
- return;
- if (unlikely(rctx->ps_shader == NULL)) {
- R600_ERR("missing vertex shader\n");
- return;
- }
- if (unlikely(rctx->vs_shader == NULL)) {
- R600_ERR("missing vertex shader\n");
- return;
- }
- /* there should be enough input */
- if (rctx->vertex_elements->count < rctx->vs_shader->shader.bc.nresource) {
- R600_ERR("%d resources provided, expecting %d\n",
- rctx->vertex_elements->count, rctx->vs_shader->shader.bc.nresource);
- return;
- }
-
- evergreen_spi_update(rctx);
-
- mask = 0;
- for (int i = 0; i < rctx->framebuffer.nr_cbufs; i++) {
- mask |= (0xF << (i * 4));
- }
-
- vgt.id = R600_PIPE_STATE_VGT;
- vgt.nregs = 0;
- r600_pipe_state_add_reg(&vgt, R_008958_VGT_PRIMITIVE_TYPE, prim, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(&vgt, R_028408_VGT_INDX_OFFSET, draw.index_bias, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(&vgt, R_028238_CB_TARGET_MASK, rctx->cb_target_mask & mask, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(&vgt, R_028400_VGT_MAX_VTX_INDX, draw.max_index, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(&vgt, R_028404_VGT_MIN_VTX_INDX, draw.min_index, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(&vgt, R_03CFF0_SQ_VTX_BASE_VTX_LOC, 0, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(&vgt, R_03CFF4_SQ_VTX_START_INST_LOC, 0, 0xFFFFFFFF, NULL);
- r600_context_pipe_state_set(&rctx->ctx, &vgt);
-
- rdraw.vgt_num_indices = draw.count;
- rdraw.vgt_num_instances = 1;
- rdraw.vgt_index_type = vgt_dma_index_type;
- rdraw.vgt_draw_initiator = vgt_draw_initiator;
- rdraw.indices = NULL;
- if (draw.index_buffer) {
- rbuffer = (struct r600_resource*)draw.index_buffer;
- rdraw.indices = rbuffer->bo;
- rdraw.indices_bo_offset = draw.index_buffer_offset;
- }
- evergreen_context_draw(&rctx->ctx, &rdraw);
-
- pipe_resource_reference(&draw.index_buffer, NULL);
-}
-
void evergreen_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader *shader)
{
struct r600_pipe_state *rstate = &shader->rstate;
struct r600_shader *rshader = &shader->shader;
- unsigned i, exports_ps, num_cout, spi_ps_in_control_0, spi_input_z, spi_ps_in_control_1;
+ unsigned i, exports_ps, num_cout, spi_ps_in_control_0, spi_input_z, spi_ps_in_control_1, db_shader_control;
int pos_index = -1, face_index = -1;
int ninterp = 0;
boolean have_linear = FALSE, have_centroid = FALSE, have_perspective = FALSE;
@@ -1487,6 +1336,7 @@ void evergreen_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader
rstate->nregs = 0;
+ db_shader_control = 0;
for (i = 0; i < rshader->ninput; i++) {
/* evergreen NUM_INTERP only contains values interpolated into the LDS,
POSITION goes via GPRs from the SC so isn't counted */
@@ -1508,16 +1358,12 @@ void evergreen_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader
}
for (i = 0; i < rshader->noutput; i++) {
if (rshader->output[i].name == TGSI_SEMANTIC_POSITION)
- r600_pipe_state_add_reg(rstate,
- R_02880C_DB_SHADER_CONTROL,
- S_02880C_Z_EXPORT_ENABLE(1),
- S_02880C_Z_EXPORT_ENABLE(1), NULL);
+ db_shader_control |= S_02880C_Z_EXPORT_ENABLE(1);
if (rshader->output[i].name == TGSI_SEMANTIC_STENCIL)
- r600_pipe_state_add_reg(rstate,
- R_02880C_DB_SHADER_CONTROL,
- S_02880C_STENCIL_EXPORT_ENABLE(1),
- S_02880C_STENCIL_EXPORT_ENABLE(1), NULL);
+ db_shader_control |= S_02880C_STENCIL_EXPORT_ENABLE(1);
}
+ if (rshader->uses_kill)
+ db_shader_control |= S_02880C_KILL_ENABLE(1);
exports_ps = 0;
num_cout = 0;
@@ -1592,15 +1438,15 @@ void evergreen_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader
r600_pipe_state_add_reg(rstate,
R_02884C_SQ_PGM_EXPORTS_PS,
exports_ps, 0xFFFFFFFF, NULL);
-
- if (rshader->uses_kill) {
- /* only set some bits here, the other bits are set in the dsa state */
- r600_pipe_state_add_reg(rstate,
- R_02880C_DB_SHADER_CONTROL,
- S_02880C_KILL_ENABLE(1),
- S_02880C_KILL_ENABLE(1), NULL);
- }
-
+ /* FIXME: Evergreen doesn't seem to support MULTIWRITE_ENABLE. */
+ /* only set some bits here, the other bits are set in the dsa state */
+ r600_pipe_state_add_reg(rstate,
+ R_02880C_DB_SHADER_CONTROL,
+ db_shader_control,
+ S_02880C_Z_EXPORT_ENABLE(1) |
+ S_02880C_STENCIL_EXPORT_ENABLE(1) |
+ S_02880C_KILL_ENABLE(1),
+ NULL);
r600_pipe_state_add_reg(rstate,
R_03A200_SQ_LOOP_CONST_0, 0x01000FFF,
0xFFFFFFFF, NULL);
@@ -1651,6 +1497,18 @@ void evergreen_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shader
0xFFFFFFFF, NULL);
}
+void evergreen_fetch_shader(struct r600_vertex_element *ve)
+{
+ struct r600_pipe_state *rstate = &ve->rstate;
+ rstate->id = R600_PIPE_STATE_FETCH_SHADER;
+ rstate->nregs = 0;
+ r600_pipe_state_add_reg(rstate, R_0288A8_SQ_PGM_RESOURCES_FS,
+ 0x00000000, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_0288A4_SQ_PGM_START_FS,
+ (r600_bo_offset(ve->fetch_shader)) >> 8,
+ 0xFFFFFFFF, ve->fetch_shader);
+}
+
void *evergreen_create_db_flush_dsa(struct r600_pipe_context *rctx)
{
struct pipe_depth_stencil_alpha_state dsa;
@@ -1673,3 +1531,31 @@ void *evergreen_create_db_flush_dsa(struct r600_pipe_context *rctx)
S_028000_COPY_CENTROID(1), NULL);
return rstate;
}
+
+void evergreen_pipe_set_buffer_resource(struct r600_pipe_context *rctx,
+ struct r600_pipe_state *rstate,
+ struct r600_resource *rbuffer,
+ unsigned offset, unsigned stride)
+{
+ r600_pipe_state_add_reg(rstate, R_030000_RESOURCE0_WORD0,
+ offset, 0xFFFFFFFF, rbuffer->bo);
+ r600_pipe_state_add_reg(rstate, R_030004_RESOURCE0_WORD1,
+ rbuffer->bo_size - offset - 1, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_030008_RESOURCE0_WORD2,
+ S_030008_STRIDE(stride),
+ 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_03000C_RESOURCE0_WORD3,
+ S_03000C_DST_SEL_X(V_03000C_SQ_SEL_X) |
+ S_03000C_DST_SEL_Y(V_03000C_SQ_SEL_Y) |
+ S_03000C_DST_SEL_Z(V_03000C_SQ_SEL_Z) |
+ S_03000C_DST_SEL_W(V_03000C_SQ_SEL_W),
+ 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_030010_RESOURCE0_WORD4,
+ 0x00000000, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_030014_RESOURCE0_WORD5,
+ 0x00000000, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_030018_RESOURCE0_WORD6,
+ 0x00000000, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_03001C_RESOURCE0_WORD7,
+ 0xC0000000, 0xFFFFFFFF, NULL);
+}
diff --git a/src/gallium/drivers/r600/evergreend.h b/src/gallium/drivers/r600/evergreend.h
index e67254b256..c51a163bd0 100644
--- a/src/gallium/drivers/r600/evergreend.h
+++ b/src/gallium/drivers/r600/evergreend.h
@@ -108,8 +108,9 @@
#define PKT3_IT_OPCODE_S(x) (((x) & 0xFF) << 8)
#define PKT3_IT_OPCODE_G(x) (((x) >> 8) & 0xFF)
#define PKT3_IT_OPCODE_C 0xFFFF00FF
+#define PKT3_PREDICATE(x) (((x) >> 0) & 0x1)
#define PKT0(index, count) (PKT_TYPE_S(0) | PKT0_BASE_INDEX_S(index) | PKT_COUNT_S(count))
-#define PKT3(op, count) (PKT_TYPE_S(3) | PKT3_IT_OPCODE_S(op) | PKT_COUNT_S(count))
+#define PKT3(op, count, predicate) (PKT_TYPE_S(3) | PKT3_IT_OPCODE_S(op) | PKT_COUNT_S(count) | PKT3_PREDICATE(predicate))
/* Registers */
#define R_008C00_SQ_CONFIG 0x00008C00
@@ -327,6 +328,9 @@
#define S_028C70_SOURCE_FORMAT(x) (((x) & 0x3) << 24)
#define G_028C70_SOURCE_FORMAT(x) (((x) >> 24) & 0x3)
#define C_028C70_SOURCE_FORMAT 0xFCFFFFFF
+#define V_028C70_EXPORT_4C_32BPC 0x0
+#define V_028C70_EXPORT_4C_16BPC 0x1
+#define V_028C70_EXPORT_2C_32BPC 0x2 /* Do not use */
#define S_028C70_RAT(x) (((x) & 0x1) << 26)
#define G_028C70_RAT(x) (((x) >> 26) & 0x1)
#define C_028C70_RAT 0xFBFFFFFF
@@ -427,15 +431,6 @@
#define C_028800_STENCILZFAIL_BF 0x1FFFFFFF
#define R_028808_CB_COLOR_CONTROL 0x028808
-#define S_028808_FOG_ENABLE(x) (((x) & 0x1) << 0)
-#define G_028808_FOG_ENABLE(x) (((x) >> 0) & 0x1)
-#define C_028808_FOG_ENABLE 0xFFFFFFFE
-#define S_028808_MULTIWRITE_ENABLE(x) (((x) & 0x1) << 1)
-#define G_028808_MULTIWRITE_ENABLE(x) (((x) >> 1) & 0x1)
-#define C_028808_MULTIWRITE_ENABLE 0xFFFFFFFD
-#define S_028808_DITHER_ENABLE(x) (((x) & 0x1) << 2)
-#define G_028808_DITHER_ENABLE(x) (((x) >> 2) & 0x1)
-#define C_028808_DITHER_ENABLE 0xFFFFFFFB
#define S_028808_DEGAMMA_ENABLE(x) (((x) & 0x1) << 3)
#define G_028808_DEGAMMA_ENABLE(x) (((x) >> 3) & 0x1)
#define C_028808_DEGAMMA_ENABLE 0xFFFFFFF7
@@ -939,6 +934,9 @@
#define V_030000_SQ_TEX_DIM_2D_ARRAY 0x00000005
#define V_030000_SQ_TEX_DIM_2D_MSAA 0x00000006
#define V_030000_SQ_TEX_DIM_2D_ARRAY_MSAA 0x00000007
+#define S_030000_NON_DISP_TILING_ORDER(x) (((x) & 0x1) << 5)
+#define G_030000_NON_DISP_TILING_ORDER(x) (((x) >> 5) & 0x1)
+#define C_030000_NON_DISP_TILING_ORDER 0xFFFFFFDF
#define S_030000_PITCH(x) (((x) & 0xFFF) << 6)
#define G_030000_PITCH(x) (((x) >> 6) & 0xFFF)
#define C_030000_PITCH 0xFFFC003F
@@ -988,8 +986,8 @@
#define S_030010_SRF_MODE_ALL(x) (((x) & 0x1) << 10)
#define G_030010_SRF_MODE_ALL(x) (((x) >> 10) & 0x1)
#define C_030010_SRF_MODE_ALL 0xFFFFFBFF
-#define V_030010_SFR_MODE_ZERO_CLAMP_MINUS_ONE 0x00000000
-#define V_030010_SFR_MODE_NO_ZERO 0x00000001
+#define V_030010_SRF_MODE_ZERO_CLAMP_MINUS_ONE 0x00000000
+#define V_030010_SRF_MODE_NO_ZERO 0x00000001
#define S_030010_FORCE_DEGAMMA(x) (((x) & 0x1) << 11)
#define G_030010_FORCE_DEGAMMA(x) (((x) >> 11) & 0x1)
#define C_030010_FORCE_DEGAMMA 0xFFFFF7FF
diff --git a/src/gallium/drivers/r600/r600.h b/src/gallium/drivers/r600/r600.h
index aa456d493f..0b7d6f7096 100644
--- a/src/gallium/drivers/r600/r600.h
+++ b/src/gallium/drivers/r600/r600.h
@@ -35,7 +35,7 @@
#define RADEON_CTX_MAX_PM4 (64 * 1024 / 4)
#define R600_ERR(fmt, args...) \
- fprintf(stderr, "EE %s/%s:%d - "fmt, __FILE__, __func__, __LINE__, ##args)
+ fprintf(stderr, "EE %s:%d %s - "fmt, __FILE__, __LINE__, __func__, ##args)
typedef uint64_t u64;
typedef uint32_t u32;
@@ -92,6 +92,9 @@ enum radeon_family {
CHIP_CYPRESS,
CHIP_HEMLOCK,
CHIP_PALM,
+ CHIP_BARTS,
+ CHIP_TURKS,
+ CHIP_CAICOS,
CHIP_LAST,
};
@@ -110,14 +113,17 @@ struct r600_tiling_info {
enum radeon_family r600_get_family(struct radeon *rw);
enum chip_class r600_get_family_class(struct radeon *radeon);
struct r600_tiling_info *r600_get_tiling_info(struct radeon *radeon);
+unsigned r600_get_clock_crystal_freq(struct radeon *radeon);
+unsigned r600_get_minor_version(struct radeon *radeon);
+unsigned r600_get_num_backends(struct radeon *radeon);
/* r600_bo.c */
struct r600_bo;
struct r600_bo *r600_bo(struct radeon *radeon,
- unsigned size, unsigned alignment,
- unsigned binding, unsigned usage);
+ unsigned size, unsigned alignment,
+ unsigned binding, unsigned usage);
struct r600_bo *r600_bo_handle(struct radeon *radeon,
- unsigned handle, unsigned *array_mode);
+ unsigned handle, unsigned *array_mode);
void *r600_bo_map(struct radeon *radeon, struct r600_bo *bo, unsigned usage, void *ctx);
void r600_bo_unmap(struct radeon *radeon, struct r600_bo *bo);
void r600_bo_reference(struct radeon *radeon, struct r600_bo **dst,
@@ -245,10 +251,9 @@ struct r600_context {
u32 *pm4;
struct list_head query_list;
unsigned num_query_running;
- unsigned fence;
struct list_head fenced_bo;
- unsigned *cfence;
- struct r600_bo *fence_bo;
+ unsigned max_db; /* for OQ */
+ boolean predicate_drawing;
};
struct r600_draw {
@@ -281,13 +286,11 @@ void r600_query_begin(struct r600_context *ctx, struct r600_query *query);
void r600_query_end(struct r600_context *ctx, struct r600_query *query);
void r600_context_queries_suspend(struct r600_context *ctx);
void r600_context_queries_resume(struct r600_context *ctx);
+void r600_query_predication(struct r600_context *ctx, struct r600_query *query, int operation,
+ int flag_wait);
int evergreen_context_init(struct r600_context *ctx, struct radeon *radeon);
void evergreen_context_draw(struct r600_context *ctx, const struct r600_draw *draw);
-void evergreen_ps_resource_set(struct r600_context *ctx, struct r600_pipe_state *state, unsigned rid);
-void evergreen_vs_resource_set(struct r600_context *ctx, struct r600_pipe_state *state, unsigned rid);
-void evergreen_fs_resource_set(struct r600_context *ctx, struct r600_pipe_state *state, unsigned rid);
-
void evergreen_context_pipe_state_set_ps_resource(struct r600_context *ctx, struct r600_pipe_state *state, unsigned rid);
void evergreen_context_pipe_state_set_vs_resource(struct r600_context *ctx, struct r600_pipe_state *state, unsigned rid);
void evergreen_context_pipe_state_set_fs_resource(struct r600_context *ctx, struct r600_pipe_state *state, unsigned rid);
diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c
index 1f41269534..240093f9b9 100644
--- a/src/gallium/drivers/r600/r600_asm.c
+++ b/src/gallium/drivers/r600/r600_asm.c
@@ -32,52 +32,118 @@
#include "r600_formats.h"
#include "r600d.h"
-static inline unsigned int r600_bc_get_num_operands(struct r600_bc_alu *alu)
+#define NUM_OF_CYCLES 3
+#define NUM_OF_COMPONENTS 4
+
+static inline unsigned int r600_bc_get_num_operands(struct r600_bc *bc, struct r600_bc_alu *alu)
{
if(alu->is_op3)
return 3;
- switch (alu->inst) {
- case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP:
- return 0;
- case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD:
- case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLE:
- case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT:
- case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGE:
- case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLNE:
- case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL:
- case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX:
- case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN:
- case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE:
- case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE:
- case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT:
- case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE:
- case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE:
- case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT:
- case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE:
- case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE:
- case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4:
- case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4_IEEE:
- case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE:
- return 2;
-
- case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV:
- case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_FLOOR:
- case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT:
- case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR:
- case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC:
- case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE:
- case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED:
- case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE:
- case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE:
- case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE:
- case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT:
- case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN:
- case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS:
- return 1;
- default: R600_ERR(
- "Need instruction operand number for 0x%x.\n", alu->inst);
- };
+ switch (bc->chiprev) {
+ case CHIPREV_R600:
+ case CHIPREV_R700:
+ switch (alu->inst) {
+ case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP:
+ return 0;
+ case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD:
+ case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT:
+ case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLE:
+ case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT:
+ case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGE:
+ case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLNE:
+ case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL:
+ case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULHI_UINT:
+ case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX:
+ case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN:
+ case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE:
+ case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE:
+ case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT:
+ case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE:
+ case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE:
+ case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT:
+ case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE:
+ case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE:
+ case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4:
+ case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4_IEEE:
+ case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE:
+ return 2;
+
+ case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV:
+ case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA:
+ case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_FLOOR:
+ case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT:
+ case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT:
+ case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR:
+ case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC:
+ case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE:
+ case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED:
+ case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE:
+ case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_CLAMPED:
+ case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE:
+ case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_CLAMPED:
+ case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE:
+ case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT:
+ case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT:
+ case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN:
+ case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS:
+ return 1;
+ default: R600_ERR(
+ "Need instruction operand number for 0x%x.\n", alu->inst);
+ }
+ break;
+ case CHIPREV_EVERGREEN:
+ switch (alu->inst) {
+ case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP:
+ return 0;
+ case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD:
+ case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT:
+ case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLE:
+ case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT:
+ case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGE:
+ case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLNE:
+ case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL:
+ case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULHI_UINT:
+ case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX:
+ case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN:
+ case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE:
+ case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE:
+ case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT:
+ case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE:
+ case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE:
+ case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT:
+ case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE:
+ case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE:
+ case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4:
+ case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4_IEEE:
+ case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE:
+ case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INTERP_XY:
+ case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INTERP_ZW:
+ return 2;
+
+ case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV:
+ case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT:
+ case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT:
+ case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR:
+ case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC:
+ case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE:
+ case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED:
+ case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE:
+ case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_CLAMPED:
+ case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE:
+ case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_CLAMPED:
+ case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE:
+ case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT:
+ case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT_FLOOR:
+ case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT:
+ case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN:
+ case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS:
+ return 1;
+ default: R600_ERR(
+ "Need instruction operand number for 0x%x.\n", alu->inst);
+ }
+ break;
+ }
return 3;
}
@@ -104,7 +170,6 @@ static struct r600_bc_alu *r600_bc_alu(void)
if (alu == NULL)
return NULL;
LIST_INITHEAD(&alu->list);
- LIST_INITHEAD(&alu->bs_list);
return alu;
}
@@ -155,6 +220,9 @@ int r600_bc_init(struct r600_bc *bc, enum radeon_family family)
case CHIP_CYPRESS:
case CHIP_HEMLOCK:
case CHIP_PALM:
+ case CHIP_BARTS:
+ case CHIP_TURKS:
+ case CHIP_CAICOS:
bc->chiprev = CHIPREV_EVERGREEN;
break;
default:
@@ -184,6 +252,37 @@ int r600_bc_add_output(struct r600_bc *bc, const struct r600_bc_output *output)
{
int r;
+ if (bc->cf_last && (bc->cf_last->inst == output->inst ||
+ (bc->cf_last->inst == BC_INST(bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT) &&
+ output->inst == BC_INST(bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE))) &&
+ output->type == bc->cf_last->output.type &&
+ output->elem_size == bc->cf_last->output.elem_size &&
+ output->swizzle_x == bc->cf_last->output.swizzle_x &&
+ output->swizzle_y == bc->cf_last->output.swizzle_y &&
+ output->swizzle_z == bc->cf_last->output.swizzle_z &&
+ output->swizzle_w == bc->cf_last->output.swizzle_w &&
+ (output->burst_count + bc->cf_last->output.burst_count) <= 16) {
+
+ if ((output->gpr + output->burst_count) == bc->cf_last->output.gpr &&
+ (output->array_base + output->burst_count) == bc->cf_last->output.array_base) {
+
+ bc->cf_last->output.end_of_program |= output->end_of_program;
+ bc->cf_last->output.inst = output->inst;
+ bc->cf_last->output.gpr = output->gpr;
+ bc->cf_last->output.array_base = output->array_base;
+ bc->cf_last->output.burst_count += output->burst_count;
+ return 0;
+
+ } else if (output->gpr == (bc->cf_last->output.gpr + bc->cf_last->output.burst_count) &&
+ output->array_base == (bc->cf_last->output.array_base + bc->cf_last->output.burst_count)) {
+
+ bc->cf_last->output.end_of_program |= output->end_of_program;
+ bc->cf_last->output.inst = output->inst;
+ bc->cf_last->output.burst_count += output->burst_count;
+ return 0;
+ }
+ }
+
r = r600_bc_add_cf(bc);
if (r)
return r;
@@ -192,221 +291,849 @@ int r600_bc_add_output(struct r600_bc *bc, const struct r600_bc_output *output)
return 0;
}
-const unsigned bank_swizzle_vec[8] = {SQ_ALU_VEC_210, //000
- SQ_ALU_VEC_120, //001
- SQ_ALU_VEC_102, //010
+/* alu instructions that can ony exits once per group */
+static int is_alu_once_inst(struct r600_bc *bc, struct r600_bc_alu *alu)
+{
+ switch (bc->chiprev) {
+ case CHIPREV_R600:
+ case CHIPREV_R700:
+ return !alu->is_op3 && (
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLE ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGE ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLNE ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT_UINT ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGE_UINT ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLE_INT ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT_INT ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGE_INT ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLNE_INT ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT_UINT ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE_UINT ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SET_INV ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SET_POP ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SET_CLR ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SET_RESTORE ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE_PUSH ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT_PUSH ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE_PUSH ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE_PUSH ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE_INT ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT_INT ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE_INT ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE_INT ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE_PUSH_INT ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT_PUSH_INT ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE_PUSH_INT ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE_PUSH_INT ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETLT_PUSH_INT ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETLE_PUSH_INT);
+ case CHIPREV_EVERGREEN:
+ default:
+ return !alu->is_op3 && (
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLE ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGE ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLNE ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT_UINT ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGE_UINT ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLE_INT ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT_INT ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGE_INT ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLNE_INT ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT_UINT ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE_UINT ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SET_INV ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SET_POP ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SET_CLR ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SET_RESTORE ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE_PUSH ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT_PUSH ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE_PUSH ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE_PUSH ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE_INT ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT_INT ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE_INT ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE_INT ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE_PUSH_INT ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT_PUSH_INT ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE_PUSH_INT ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE_PUSH_INT ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETLT_PUSH_INT ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETLE_PUSH_INT);
+ }
+}
+
+static int is_alu_reduction_inst(struct r600_bc *bc, struct r600_bc_alu *alu)
+{
+ switch (bc->chiprev) {
+ case CHIPREV_R600:
+ case CHIPREV_R700:
+ return !alu->is_op3 && (
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4 ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4_IEEE ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX4);
+ case CHIPREV_EVERGREEN:
+ default:
+ return !alu->is_op3 && (
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4 ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4_IEEE ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX4);
+ }
+}
- SQ_ALU_VEC_201, //011
- SQ_ALU_VEC_012, //100
- SQ_ALU_VEC_021, //101
+static int is_alu_cube_inst(struct r600_bc *bc, struct r600_bc_alu *alu)
+{
+ switch (bc->chiprev) {
+ case CHIPREV_R600:
+ case CHIPREV_R700:
+ return !alu->is_op3 &&
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE;
+ case CHIPREV_EVERGREEN:
+ default:
+ return !alu->is_op3 &&
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE;
+ }
+}
- SQ_ALU_VEC_012, //110
- SQ_ALU_VEC_012}; //111
+static int is_alu_mova_inst(struct r600_bc *bc, struct r600_bc_alu *alu)
+{
+ switch (bc->chiprev) {
+ case CHIPREV_R600:
+ case CHIPREV_R700:
+ return !alu->is_op3 && (
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_FLOOR ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT);
+ case CHIPREV_EVERGREEN:
+ default:
+ return !alu->is_op3 && (
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT);
+ }
+}
-const unsigned bank_swizzle_scl[8] = {SQ_ALU_SCL_210, //000
- SQ_ALU_SCL_122, //001
- SQ_ALU_SCL_122, //010
+/* alu instructions that can only execute on the vector unit */
+static int is_alu_vec_unit_inst(struct r600_bc *bc, struct r600_bc_alu *alu)
+{
+ return is_alu_reduction_inst(bc, alu) ||
+ is_alu_mova_inst(bc, alu) ||
+ (bc->chiprev == CHIPREV_EVERGREEN &&
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT_FLOOR);
+}
- SQ_ALU_SCL_221, //011
- SQ_ALU_SCL_212, //100
- SQ_ALU_SCL_122, //101
+/* alu instructions that can only execute on the trans unit */
+static int is_alu_trans_unit_inst(struct r600_bc *bc, struct r600_bc_alu *alu)
+{
+ switch (bc->chiprev) {
+ case CHIPREV_R600:
+ case CHIPREV_R700:
+ if (!alu->is_op3)
+ return alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ASHR_INT ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHL_INT ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHR_INT ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULHI_INT ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULHI_UINT ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_INT ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_UINT ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_INT ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_UINT ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_UINT_TO_FLT ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_CLAMPED ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_FF ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_CLAMPED ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_FF ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN ||
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SQRT_IEEE;
+ else
+ return alu->inst == V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT ||
+ alu->inst == V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT_D2 ||
+ alu->inst == V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT_M2 ||
+ alu->inst == V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT_M4;
+ case CHIPREV_EVERGREEN:
+ default:
+ if (!alu->is_op3)
+ /* Note that FLT_TO_INT_* instructions are vector-only instructions
+ * on Evergreen, despite what the documentation says. FLT_TO_INT
+ * can do both vector and scalar. */
+ return alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ASHR_INT ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHL_INT ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHR_INT ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULHI_INT ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULHI_UINT ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_INT ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_UINT ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_INT ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_UINT ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_UINT_TO_FLT ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_CLAMPED ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_FF ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_CLAMPED ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_FF ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN ||
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SQRT_IEEE;
+ else
+ return alu->inst == EG_V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT;
+ }
+}
- SQ_ALU_SCL_122, //110
- SQ_ALU_SCL_122}; //111
+/* alu instructions that can execute on any unit */
+static int is_alu_any_unit_inst(struct r600_bc *bc, struct r600_bc_alu *alu)
+{
+ return !is_alu_vec_unit_inst(bc, alu) &&
+ !is_alu_trans_unit_inst(bc, alu);
+}
-static int init_gpr(struct r600_bc_alu *alu)
+static int assign_alu_units(struct r600_bc *bc, struct r600_bc_alu *alu_first,
+ struct r600_bc_alu *assignment[5])
{
- int cycle, component;
+ struct r600_bc_alu *alu;
+ unsigned i, chan, trans;
+
+ for (i = 0; i < 5; i++)
+ assignment[i] = NULL;
+
+ for (alu = alu_first; alu; alu = LIST_ENTRY(struct r600_bc_alu, alu->list.next, list)) {
+ chan = alu->dst.chan;
+ if (is_alu_trans_unit_inst(bc, alu))
+ trans = 1;
+ else if (is_alu_vec_unit_inst(bc, alu))
+ trans = 0;
+ else if (assignment[chan])
+ trans = 1; // assume ALU_INST_PREFER_VECTOR
+ else
+ trans = 0;
+
+ if (trans) {
+ if (assignment[4]) {
+ assert(0); //ALU.Trans has already been allocated
+ return -1;
+ }
+ assignment[4] = alu;
+ } else {
+ if (assignment[chan]) {
+ assert(0); //ALU.chan has already been allocated
+ return -1;
+ }
+ assignment[chan] = alu;
+ }
+
+ if (alu->last)
+ break;
+ }
+ return 0;
+}
+
+struct alu_bank_swizzle {
+ int hw_gpr[NUM_OF_CYCLES][NUM_OF_COMPONENTS];
+ int hw_cfile_addr[4];
+ int hw_cfile_elem[4];
+};
+
+static const unsigned cycle_for_bank_swizzle_vec[][3] = {
+ [SQ_ALU_VEC_012] = { 0, 1, 2 },
+ [SQ_ALU_VEC_021] = { 0, 2, 1 },
+ [SQ_ALU_VEC_120] = { 1, 2, 0 },
+ [SQ_ALU_VEC_102] = { 1, 0, 2 },
+ [SQ_ALU_VEC_201] = { 2, 0, 1 },
+ [SQ_ALU_VEC_210] = { 2, 1, 0 }
+};
+
+static const unsigned cycle_for_bank_swizzle_scl[][3] = {
+ [SQ_ALU_SCL_210] = { 2, 1, 0 },
+ [SQ_ALU_SCL_122] = { 1, 2, 2 },
+ [SQ_ALU_SCL_212] = { 2, 1, 2 },
+ [SQ_ALU_SCL_221] = { 2, 2, 1 }
+};
+
+static void init_bank_swizzle(struct alu_bank_swizzle *bs)
+{
+ int i, cycle, component;
/* set up gpr use */
for (cycle = 0; cycle < NUM_OF_CYCLES; cycle++)
for (component = 0; component < NUM_OF_COMPONENTS; component++)
- alu->hw_gpr[cycle][component] = -1;
- return 0;
+ bs->hw_gpr[cycle][component] = -1;
+ for (i = 0; i < 4; i++)
+ bs->hw_cfile_addr[i] = -1;
+ for (i = 0; i < 4; i++)
+ bs->hw_cfile_elem[i] = -1;
}
-#if 0
-static int reserve_gpr(struct r600_bc_alu *alu, unsigned sel, unsigned chan, unsigned cycle)
+static int reserve_gpr(struct alu_bank_swizzle *bs, unsigned sel, unsigned chan, unsigned cycle)
{
- if (alu->hw_gpr[cycle][chan] < 0)
- alu->hw_gpr[cycle][chan] = sel;
- else if (alu->hw_gpr[cycle][chan] != (int)sel) {
- R600_ERR("Another scalar operation has already used GPR read port for channel\n");
+ if (bs->hw_gpr[cycle][chan] == -1)
+ bs->hw_gpr[cycle][chan] = sel;
+ else if (bs->hw_gpr[cycle][chan] != (int)sel) {
+ // Another scalar operation has already used GPR read port for channel
return -1;
}
return 0;
}
-static int cycle_for_scalar_bank_swizzle(const int swiz, const int sel, unsigned *p_cycle)
+static int reserve_cfile(struct r600_bc *bc, struct alu_bank_swizzle *bs, unsigned sel, unsigned chan)
{
- int table[3];
- int ret = 0;
- switch (swiz) {
- case SQ_ALU_SCL_210:
- table[0] = 2; table[1] = 1; table[2] = 0;
- *p_cycle = table[sel];
- break;
- case SQ_ALU_SCL_122:
- table[0] = 1; table[1] = 2; table[2] = 2;
- *p_cycle = table[sel];
- break;
- case SQ_ALU_SCL_212:
- table[0] = 2; table[1] = 1; table[2] = 2;
- *p_cycle = table[sel];
- break;
- case SQ_ALU_SCL_221:
- table[0] = 2; table[1] = 2; table[2] = 1;
- *p_cycle = table[sel];
- break;
- break;
- default:
- R600_ERR("bad scalar bank swizzle value\n");
- ret = -1;
- break;
+ int res, num_res = 4;
+ if (bc->chiprev >= CHIPREV_R700) {
+ num_res = 2;
+ chan /= 2;
+ }
+ for (res = 0; res < num_res; ++res) {
+ if (bs->hw_cfile_addr[res] == -1) {
+ bs->hw_cfile_addr[res] = sel;
+ bs->hw_cfile_elem[res] = chan;
+ return 0;
+ } else if (bs->hw_cfile_addr[res] == sel &&
+ bs->hw_cfile_elem[res] == chan)
+ return 0; // Read for this scalar element already reserved, nothing to do here.
}
- return ret;
+ // All cfile read ports are used, cannot reference vector element
+ return -1;
}
-static int cycle_for_vector_bank_swizzle(const int swiz, const int sel, unsigned *p_cycle)
+static int is_gpr(unsigned sel)
{
- int table[3];
- int ret;
-
- switch (swiz) {
- case SQ_ALU_VEC_012:
- table[0] = 0; table[1] = 1; table[2] = 2;
- *p_cycle = table[sel];
- break;
- case SQ_ALU_VEC_021:
- table[0] = 0; table[1] = 2; table[2] = 1;
- *p_cycle = table[sel];
- break;
- case SQ_ALU_VEC_120:
- table[0] = 1; table[1] = 2; table[2] = 0;
- *p_cycle = table[sel];
- break;
- case SQ_ALU_VEC_102:
- table[0] = 1; table[1] = 0; table[2] = 2;
- *p_cycle = table[sel];
- break;
- case SQ_ALU_VEC_201:
- table[0] = 2; table[1] = 0; table[2] = 1;
- *p_cycle = table[sel];
- break;
- case SQ_ALU_VEC_210:
- table[0] = 2; table[1] = 1; table[2] = 0;
- *p_cycle = table[sel];
- break;
- default:
- R600_ERR("bad vector bank swizzle value\n");
- ret = -1;
- break;
- }
- return ret;
+ return (sel >= 0 && sel <= 127);
}
+/* CB constants start at 512, and get translated to a kcache index when ALU
+ * clauses are constructed. Note that we handle kcache constants the same way
+ * as (the now gone) cfile constants, is that really required? */
+static int is_cfile(unsigned sel)
+{
+ return (sel > 255 && sel < 512) ||
+ (sel > 511 && sel < 4607) || // Kcache before translate
+ (sel > 127 && sel < 192); // Kcache after translate
+}
+static int is_const(int sel)
+{
+ return is_cfile(sel) ||
+ (sel >= V_SQ_ALU_SRC_0 &&
+ sel <= V_SQ_ALU_SRC_LITERAL);
+}
-static void update_chan_counter(struct r600_bc_alu *alu, int *chan_counter)
+static int check_vector(struct r600_bc *bc, struct r600_bc_alu *alu,
+ struct alu_bank_swizzle *bs, int bank_swizzle)
{
- int num_src;
- int i;
- int channel_swizzle;
+ int r, src, num_src, sel, elem, cycle;
+
+ num_src = r600_bc_get_num_operands(bc, alu);
+ for (src = 0; src < num_src; src++) {
+ sel = alu->src[src].sel;
+ elem = alu->src[src].chan;
+ if (is_gpr(sel)) {
+ cycle = cycle_for_bank_swizzle_vec[bank_swizzle][src];
+ if (src == 1 && sel == alu->src[0].sel && elem == alu->src[0].chan)
+ // Nothing to do; special-case optimization,
+ // second source uses first source’s reservation
+ continue;
+ else {
+ r = reserve_gpr(bs, sel, elem, cycle);
+ if (r)
+ return r;
+ }
+ } else if (is_cfile(sel)) {
+ r = reserve_cfile(bc, bs, sel, elem);
+ if (r)
+ return r;
+ }
+ // No restrictions on PV, PS, literal or special constants
+ }
+ return 0;
+}
- num_src = r600_bc_get_num_operands(alu);
+static int check_scalar(struct r600_bc *bc, struct r600_bc_alu *alu,
+ struct alu_bank_swizzle *bs, int bank_swizzle)
+{
+ int r, src, num_src, const_count, sel, elem, cycle;
+
+ num_src = r600_bc_get_num_operands(bc, alu);
+ for (const_count = 0, src = 0; src < num_src; ++src) {
+ sel = alu->src[src].sel;
+ elem = alu->src[src].chan;
+ if (is_const(sel)) { // Any constant, including literal and inline constants
+ if (const_count >= 2)
+ // More than two references to a constant in
+ // transcendental operation.
+ return -1;
+ else
+ const_count++;
+ }
+ if (is_cfile(sel)) {
+ r = reserve_cfile(bc, bs, sel, elem);
+ if (r)
+ return r;
+ }
+ }
+ for (src = 0; src < num_src; ++src) {
+ sel = alu->src[src].sel;
+ elem = alu->src[src].chan;
+ if (is_gpr(sel)) {
+ cycle = cycle_for_bank_swizzle_scl[bank_swizzle][src];
+ if (cycle < const_count)
+ // Cycle for GPR load conflicts with
+ // constant load in transcendental operation.
+ return -1;
+ r = reserve_gpr(bs, sel, elem, cycle);
+ if (r)
+ return r;
+ }
+ // Constants already processed
+ // No restrictions on PV, PS
+ }
+ return 0;
+}
- for (i = 0; i < num_src; i++) {
- channel_swizzle = alu->src[i].chan;
- if ((alu->src[i].sel > 0 && alu->src[i].sel < 128) && channel_swizzle <= 3)
- chan_counter[channel_swizzle]++;
+static int check_and_set_bank_swizzle(struct r600_bc *bc,
+ struct r600_bc_alu *slots[5])
+{
+ struct alu_bank_swizzle bs;
+ int bank_swizzle[5];
+ int i, r = 0, forced = 0;
+
+ for (i = 0; i < 5; i++)
+ if (slots[i] && slots[i]->bank_swizzle_force) {
+ slots[i]->bank_swizzle = slots[i]->bank_swizzle_force;
+ forced = 1;
+ }
+
+ if (forced)
+ return 0;
+
+ // just check every possible combination of bank swizzle
+ // not very efficent, but works on the first try in most of the cases
+ for (i = 0; i < 4; i++)
+ bank_swizzle[i] = SQ_ALU_VEC_012;
+ bank_swizzle[4] = SQ_ALU_SCL_210;
+ while(bank_swizzle[4] <= SQ_ALU_SCL_221) {
+ init_bank_swizzle(&bs);
+ for (i = 0; i < 4; i++) {
+ if (slots[i]) {
+ r = check_vector(bc, slots[i], &bs, bank_swizzle[i]);
+ if (r)
+ break;
+ }
+ }
+ if (!r && slots[4]) {
+ r = check_scalar(bc, slots[4], &bs, bank_swizzle[4]);
+ }
+ if (!r) {
+ for (i = 0; i < 5; i++) {
+ if (slots[i])
+ slots[i]->bank_swizzle = bank_swizzle[i];
+ }
+ return 0;
+ }
+
+ for (i = 0; i < 5; i++) {
+ bank_swizzle[i]++;
+ if (bank_swizzle[i] <= SQ_ALU_VEC_210)
+ break;
+ else
+ bank_swizzle[i] = SQ_ALU_VEC_012;
+ }
}
+
+ // couldn't find a working swizzle
+ return -1;
}
-/* we need something like this I think - but this is bogus */
-int check_read_slots(struct r600_bc *bc, struct r600_bc_alu *alu_first)
+static int replace_gpr_with_pv_ps(struct r600_bc *bc,
+ struct r600_bc_alu *slots[5], struct r600_bc_alu *alu_prev)
{
- struct r600_bc_alu *alu;
- int chan_counter[4] = { 0 };
+ struct r600_bc_alu *prev[5];
+ int gpr[5], chan[5];
+ int i, j, r, src, num_src;
- update_chan_counter(alu_first, chan_counter);
+ r = assign_alu_units(bc, alu_prev, prev);
+ if (r)
+ return r;
- LIST_FOR_EACH_ENTRY(alu, &alu_first->bs_list, bs_list) {
- update_chan_counter(alu, chan_counter);
+ for (i = 0; i < 5; ++i) {
+ if(prev[i] && prev[i]->dst.write && !prev[i]->dst.rel) {
+ gpr[i] = prev[i]->dst.sel;
+ /* cube writes more than PV.X */
+ if (!is_alu_cube_inst(bc, prev[i]) && is_alu_reduction_inst(bc, prev[i]))
+ chan[i] = 0;
+ else
+ chan[i] = prev[i]->dst.chan;
+ } else
+ gpr[i] = -1;
}
- if (chan_counter[0] > 3 ||
- chan_counter[1] > 3 ||
- chan_counter[2] > 3 ||
- chan_counter[3] > 3) {
- R600_ERR("needed to split instruction for input ran out of banks %x %d %d %d %d\n",
- alu_first->inst, chan_counter[0], chan_counter[1], chan_counter[2], chan_counter[3]);
- return -1;
+ for (i = 0; i < 5; ++i) {
+ struct r600_bc_alu *alu = slots[i];
+ if(!alu)
+ continue;
+
+ num_src = r600_bc_get_num_operands(bc, alu);
+ for (src = 0; src < num_src; ++src) {
+ if (!is_gpr(alu->src[src].sel) || alu->src[src].rel)
+ continue;
+
+ if (alu->src[src].sel == gpr[4] &&
+ alu->src[src].chan == chan[4]) {
+ alu->src[src].sel = V_SQ_ALU_SRC_PS;
+ alu->src[src].chan = 0;
+ continue;
+ }
+
+ for (j = 0; j < 4; ++j) {
+ if (alu->src[src].sel == gpr[j] &&
+ alu->src[src].chan == j) {
+ alu->src[src].sel = V_SQ_ALU_SRC_PV;
+ alu->src[src].chan = chan[j];
+ break;
+ }
+ }
+ }
}
+
return 0;
}
-#endif
-static int is_const(int sel)
+void r600_bc_special_constants(u32 value, unsigned *sel, unsigned *neg)
{
- if (sel > 255 && sel < 512)
- return 1;
- if (sel >= V_SQ_ALU_SRC_0 && sel <= V_SQ_ALU_SRC_LITERAL)
- return 1;
- return 0;
+ switch(value) {
+ case 0:
+ *sel = V_SQ_ALU_SRC_0;
+ break;
+ case 1:
+ *sel = V_SQ_ALU_SRC_1_INT;
+ break;
+ case -1:
+ *sel = V_SQ_ALU_SRC_M_1_INT;
+ break;
+ case 0x3F800000: // 1.0f
+ *sel = V_SQ_ALU_SRC_1;
+ break;
+ case 0x3F000000: // 0.5f
+ *sel = V_SQ_ALU_SRC_0_5;
+ break;
+ case 0xBF800000: // -1.0f
+ *sel = V_SQ_ALU_SRC_1;
+ *neg ^= 1;
+ break;
+ case 0xBF000000: // -0.5f
+ *sel = V_SQ_ALU_SRC_0_5;
+ *neg ^= 1;
+ break;
+ default:
+ *sel = V_SQ_ALU_SRC_LITERAL;
+ break;
+ }
}
-static int check_scalar(struct r600_bc *bc, struct r600_bc_alu *alu)
+/* compute how many literal are needed */
+static int r600_bc_alu_nliterals(struct r600_bc *bc, struct r600_bc_alu *alu,
+ uint32_t literal[4], unsigned *nliteral)
{
- unsigned swizzle_key;
-
- if (alu->bank_swizzle_force) {
- alu->bank_swizzle = alu->bank_swizzle_force;
- return 0;
+ unsigned num_src = r600_bc_get_num_operands(bc, alu);
+ unsigned i, j;
+
+ for (i = 0; i < num_src; ++i) {
+ if (alu->src[i].sel == V_SQ_ALU_SRC_LITERAL) {
+ uint32_t value = alu->src[i].value;
+ unsigned found = 0;
+ for (j = 0; j < *nliteral; ++j) {
+ if (literal[j] == value) {
+ found = 1;
+ break;
+ }
+ }
+ if (!found) {
+ if (*nliteral >= 4)
+ return -EINVAL;
+ literal[(*nliteral)++] = value;
+ }
+ }
}
- swizzle_key = (is_const(alu->src[0].sel) ? 4 : 0 ) +
- (is_const(alu->src[1].sel) ? 2 : 0 ) +
- (is_const(alu->src[2].sel) ? 1 : 0 );
-
- alu->bank_swizzle = bank_swizzle_scl[swizzle_key];
return 0;
}
-static int check_vector(struct r600_bc *bc, struct r600_bc_alu *alu)
+static void r600_bc_alu_adjust_literals(struct r600_bc *bc,
+ struct r600_bc_alu *alu,
+ uint32_t literal[4], unsigned nliteral)
+{
+ unsigned num_src = r600_bc_get_num_operands(bc, alu);
+ unsigned i, j;
+
+ for (i = 0; i < num_src; ++i) {
+ if (alu->src[i].sel == V_SQ_ALU_SRC_LITERAL) {
+ uint32_t value = alu->src[i].value;
+ for (j = 0; j < nliteral; ++j) {
+ if (literal[j] == value) {
+ alu->src[i].chan = j;
+ break;
+ }
+ }
+ }
+ }
+}
+
+static int merge_inst_groups(struct r600_bc *bc, struct r600_bc_alu *slots[5],
+ struct r600_bc_alu *alu_prev)
{
- unsigned swizzle_key;
+ struct r600_bc_alu *prev[5];
+ struct r600_bc_alu *result[5] = { NULL };
+
+ uint32_t literal[4], prev_literal[4];
+ unsigned nliteral = 0, prev_nliteral = 0;
- if (alu->bank_swizzle_force) {
- alu->bank_swizzle = alu->bank_swizzle_force;
+ int i, j, r, src, num_src;
+ int num_once_inst = 0;
+ int have_mova = 0, have_rel = 0;
+
+ r = assign_alu_units(bc, alu_prev, prev);
+ if (r)
+ return r;
+
+ for (i = 0; i < 5; ++i) {
+ struct r600_bc_alu *alu;
+
+ /* check number of literals */
+ if (prev[i]) {
+ if (r600_bc_alu_nliterals(bc, prev[i], literal, &nliteral))
+ return 0;
+ if (r600_bc_alu_nliterals(bc, prev[i], prev_literal, &prev_nliteral))
+ return 0;
+ if (is_alu_mova_inst(bc, prev[i])) {
+ if (have_rel)
+ return 0;
+ have_mova = 1;
+ }
+ num_once_inst += is_alu_once_inst(bc, prev[i]);
+ }
+ if (slots[i] && r600_bc_alu_nliterals(bc, slots[i], literal, &nliteral))
+ return 0;
+
+ // let's check used slots
+ if (prev[i] && !slots[i]) {
+ result[i] = prev[i];
+ continue;
+ } else if (prev[i] && slots[i]) {
+ if (result[4] == NULL && prev[4] == NULL && slots[4] == NULL) {
+ // trans unit is still free try to use it
+ if (is_alu_any_unit_inst(bc, slots[i])) {
+ result[i] = prev[i];
+ result[4] = slots[i];
+ } else if (is_alu_any_unit_inst(bc, prev[i])) {
+ result[i] = slots[i];
+ result[4] = prev[i];
+ } else
+ return 0;
+ } else
+ return 0;
+ } else if(!slots[i]) {
+ continue;
+ } else
+ result[i] = slots[i];
+
+ // let's check source gprs
+ alu = slots[i];
+ num_once_inst += is_alu_once_inst(bc, alu);
+
+ num_src = r600_bc_get_num_operands(bc, alu);
+ for (src = 0; src < num_src; ++src) {
+ if (alu->src[src].rel) {
+ if (have_mova)
+ return 0;
+ have_rel = 1;
+ }
+
+ // constants doesn't matter
+ if (!is_gpr(alu->src[src].sel))
+ continue;
+
+ for (j = 0; j < 5; ++j) {
+ if (!prev[j] || !prev[j]->dst.write)
+ continue;
+
+ // if it's relative then we can't determin which gpr is really used
+ if (prev[j]->dst.chan == alu->src[src].chan &&
+ (prev[j]->dst.sel == alu->src[src].sel ||
+ prev[j]->dst.rel || alu->src[src].rel))
+ return 0;
+ }
+ }
+ }
+
+ /* more than one PRED_ or KILL_ ? */
+ if (num_once_inst > 1)
return 0;
+
+ /* check if the result can still be swizzlet */
+ r = check_and_set_bank_swizzle(bc, result);
+ if (r)
+ return 0;
+
+ /* looks like everything worked out right, apply the changes */
+
+ /* undo adding previus literals */
+ bc->cf_last->ndw -= align(prev_nliteral, 2);
+
+ /* sort instructions */
+ for (i = 0; i < 5; ++i) {
+ slots[i] = result[i];
+ if (result[i]) {
+ LIST_DEL(&result[i]->list);
+ result[i]->last = 0;
+ LIST_ADDTAIL(&result[i]->list, &bc->cf_last->alu);
+ }
+ }
+
+ /* determine new last instruction */
+ LIST_ENTRY(struct r600_bc_alu, bc->cf_last->alu.prev, list)->last = 1;
+
+ /* determine new first instruction */
+ for (i = 0; i < 5; ++i) {
+ if (result[i]) {
+ bc->cf_last->curr_bs_head = result[i];
+ break;
+ }
}
- swizzle_key = (is_const(alu->src[0].sel) ? 4 : 0 ) +
- (is_const(alu->src[1].sel) ? 2 : 0 ) +
- (is_const(alu->src[2].sel) ? 1 : 0 );
- alu->bank_swizzle = bank_swizzle_vec[swizzle_key];
+ bc->cf_last->prev_bs_head = bc->cf_last->prev2_bs_head;
+ bc->cf_last->prev2_bs_head = NULL;
+
return 0;
}
-static int check_and_set_bank_swizzle(struct r600_bc *bc, struct r600_bc_alu *alu_first)
+/* This code handles kcache lines as single blocks of 32 constants. We could
+ * probably do slightly better by recognizing that we actually have two
+ * consecutive lines of 16 constants, but the resulting code would also be
+ * somewhat more complicated. */
+static int r600_bc_alloc_kcache_lines(struct r600_bc *bc, struct r600_bc_alu *alu, int type)
{
- struct r600_bc_alu *alu = NULL;
- int num_instr = 1;
+ struct r600_bc_kcache *kcache = bc->cf_last->kcache;
+ unsigned int required_lines;
+ unsigned int free_lines = 0;
+ unsigned int cache_line[3];
+ unsigned int count = 0;
+ unsigned int i, j;
+ int r;
+
+ /* Collect required cache lines. */
+ for (i = 0; i < 3; ++i) {
+ bool found = false;
+ unsigned int line;
+
+ if (alu->src[i].sel < 512)
+ continue;
- init_gpr(alu_first);
+ line = ((alu->src[i].sel - 512) / 32) * 2;
- LIST_FOR_EACH_ENTRY(alu, &alu_first->bs_list, bs_list) {
- num_instr++;
+ for (j = 0; j < count; ++j) {
+ if (cache_line[j] == line) {
+ found = true;
+ break;
+ }
+ }
+
+ if (!found)
+ cache_line[count++] = line;
}
- if (num_instr == 1) {
- check_scalar(bc, alu_first);
-
- } else {
-/* check_read_slots(bc, bc->cf_last->curr_bs_head);*/
- check_vector(bc, alu_first);
- LIST_FOR_EACH_ENTRY(alu, &alu_first->bs_list, bs_list) {
- check_vector(bc, alu);
+ /* This should never actually happen. */
+ if (count >= 3) return -ENOMEM;
+
+ for (i = 0; i < 2; ++i) {
+ if (kcache[i].mode == V_SQ_CF_KCACHE_NOP) {
+ ++free_lines;
+ }
+ }
+
+ /* Filter lines pulled in by previous intructions. Note that this is
+ * only for the required_lines count, we can't remove these from the
+ * cache_line array since we may have to start a new ALU clause. */
+ for (i = 0, required_lines = count; i < count; ++i) {
+ for (j = 0; j < 2; ++j) {
+ if (kcache[j].mode == V_SQ_CF_KCACHE_LOCK_2 &&
+ kcache[j].addr == cache_line[i]) {
+ --required_lines;
+ break;
+ }
+ }
+ }
+
+ /* Start a new ALU clause if needed. */
+ if (required_lines > free_lines) {
+ if ((r = r600_bc_add_cf(bc))) {
+ return r;
+ }
+ bc->cf_last->inst = (type << 3);
+ kcache = bc->cf_last->kcache;
+ }
+
+ /* Setup the kcache lines. */
+ for (i = 0; i < count; ++i) {
+ bool found = false;
+
+ for (j = 0; j < 2; ++j) {
+ if (kcache[j].mode == V_SQ_CF_KCACHE_LOCK_2 &&
+ kcache[j].addr == cache_line[i]) {
+ found = true;
+ break;
+ }
+ }
+
+ if (found) continue;
+
+ for (j = 0; j < 2; ++j) {
+ if (kcache[j].mode == V_SQ_CF_KCACHE_NOP) {
+ kcache[j].bank = 0;
+ kcache[j].addr = cache_line[i];
+ kcache[j].mode = V_SQ_CF_KCACHE_LOCK_2;
+ break;
+ }
+ }
+ }
+
+ /* Alter the src operands to refer to the kcache. */
+ for (i = 0; i < 3; ++i) {
+ static const unsigned int base[] = {128, 160, 256, 288};
+ unsigned int line;
+
+ if (alu->src[i].sel < 512)
+ continue;
+
+ alu->src[i].sel -= 512;
+ line = (alu->src[i].sel / 32) * 2;
+
+ for (j = 0; j < 2; ++j) {
+ if (kcache[j].mode == V_SQ_CF_KCACHE_LOCK_2 &&
+ kcache[j].addr == line) {
+ alu->src[i].sel &= 0x1f;
+ alu->src[i].sel += base[j];
+ break;
+ }
}
}
+
return 0;
}
@@ -419,62 +1146,100 @@ int r600_bc_add_alu_type(struct r600_bc *bc, const struct r600_bc_alu *alu, int
if (nalu == NULL)
return -ENOMEM;
memcpy(nalu, alu, sizeof(struct r600_bc_alu));
- nalu->nliteral = 0;
+
+ if (bc->cf_last != NULL && bc->cf_last->inst != (type << 3)) {
+ /* check if we could add it anyway */
+ if (bc->cf_last->inst == (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3) &&
+ type == V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE) {
+ LIST_FOR_EACH_ENTRY(lalu, &bc->cf_last->alu, list) {
+ if (lalu->predicate) {
+ bc->force_add_cf = 1;
+ break;
+ }
+ }
+ } else
+ bc->force_add_cf = 1;
+ }
/* cf can contains only alu or only vtx or only tex */
- if (bc->cf_last == NULL || bc->cf_last->inst != (type << 3) ||
- bc->force_add_cf) {
+ if (bc->cf_last == NULL || bc->force_add_cf) {
r = r600_bc_add_cf(bc);
if (r) {
free(nalu);
return r;
}
- bc->cf_last->inst = (type << 3);
}
+ bc->cf_last->inst = (type << 3);
+
+ /* Setup the kcache for this ALU instruction. This will start a new
+ * ALU clause if needed. */
+ if ((r = r600_bc_alloc_kcache_lines(bc, nalu, type))) {
+ free(nalu);
+ return r;
+ }
+
if (!bc->cf_last->curr_bs_head) {
bc->cf_last->curr_bs_head = nalu;
- LIST_INITHEAD(&nalu->bs_list);
- } else {
- LIST_ADDTAIL(&nalu->bs_list, &bc->cf_last->curr_bs_head->bs_list);
- }
- /* at most 128 slots, one add alu can add 4 slots + 4 constants(2 slots)
- * worst case */
- if (alu->last && (bc->cf_last->ndw >> 1) >= 120) {
- bc->force_add_cf = 1;
}
/* number of gpr == the last gpr used in any alu */
for (i = 0; i < 3; i++) {
- if (alu->src[i].sel >= bc->ngpr && alu->src[i].sel < 128) {
- bc->ngpr = alu->src[i].sel + 1;
- }
- /* compute how many literal are needed
- * either 2 or 4 literals
- */
- if (alu->src[i].sel == 253) {
- if (((alu->src[i].chan + 2) & 0x6) > nalu->nliteral) {
- nalu->nliteral = (alu->src[i].chan + 2) & 0x6;
- }
+ if (nalu->src[i].sel >= bc->ngpr && nalu->src[i].sel < 128) {
+ bc->ngpr = nalu->src[i].sel + 1;
}
+ if (nalu->src[i].sel == V_SQ_ALU_SRC_LITERAL)
+ r600_bc_special_constants(nalu->src[i].value,
+ &nalu->src[i].sel, &nalu->src[i].neg);
}
- if (!LIST_IS_EMPTY(&bc->cf_last->alu)) {
- lalu = LIST_ENTRY(struct r600_bc_alu, bc->cf_last->alu.prev, list);
- if (!lalu->last && lalu->nliteral > nalu->nliteral) {
- nalu->nliteral = lalu->nliteral;
- }
- }
- if (alu->dst.sel >= bc->ngpr) {
- bc->ngpr = alu->dst.sel + 1;
+ if (nalu->dst.sel >= bc->ngpr) {
+ bc->ngpr = nalu->dst.sel + 1;
}
LIST_ADDTAIL(&nalu->list, &bc->cf_last->alu);
/* each alu use 2 dwords */
bc->cf_last->ndw += 2;
bc->ndw += 2;
- bc->cf_last->kcache0_mode = 2;
-
/* process cur ALU instructions for bank swizzle */
- if (alu->last) {
- check_and_set_bank_swizzle(bc, bc->cf_last->curr_bs_head);
+ if (nalu->last) {
+ uint32_t literal[4];
+ unsigned nliteral;
+ struct r600_bc_alu *slots[5];
+ r = assign_alu_units(bc, bc->cf_last->curr_bs_head, slots);
+ if (r)
+ return r;
+
+ if (bc->cf_last->prev_bs_head) {
+ r = merge_inst_groups(bc, slots, bc->cf_last->prev_bs_head);
+ if (r)
+ return r;
+ }
+
+ if (bc->cf_last->prev_bs_head) {
+ r = replace_gpr_with_pv_ps(bc, slots, bc->cf_last->prev_bs_head);
+ if (r)
+ return r;
+ }
+
+ r = check_and_set_bank_swizzle(bc, slots);
+ if (r)
+ return r;
+
+ for (i = 0, nliteral = 0; i < 5; i++) {
+ if (slots[i]) {
+ r = r600_bc_alu_nliterals(bc, slots[i], literal, &nliteral);
+ if (r)
+ return r;
+ }
+ }
+ bc->cf_last->ndw += align(nliteral, 2);
+
+ /* at most 128 slots, one add alu can add 5 slots + 4 constants(2 slots)
+ * worst case */
+ if ((bc->cf_last->ndw >> 1) >= 120) {
+ bc->force_add_cf = 1;
+ }
+
+ bc->cf_last->prev2_bs_head = bc->cf_last->prev_bs_head;
+ bc->cf_last->prev_bs_head = bc->cf_last->curr_bs_head;
bc->cf_last->curr_bs_head = NULL;
}
return 0;
@@ -485,42 +1250,22 @@ int r600_bc_add_alu(struct r600_bc *bc, const struct r600_bc_alu *alu)
return r600_bc_add_alu_type(bc, alu, BC_INST(bc, V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU));
}
-int r600_bc_add_literal(struct r600_bc *bc, const u32 *value)
+static unsigned r600_bc_num_tex_and_vtx_instructions(const struct r600_bc *bc)
{
- struct r600_bc_alu *alu;
+ switch (bc->chiprev) {
+ case CHIPREV_R600:
+ return 8;
- if (bc->cf_last == NULL) {
- return 0;
- }
- if (bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_TEX) {
- return 0;
- }
- /* all same on EG */
- if (bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_JUMP ||
- bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_ELSE ||
- bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL ||
- bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK ||
- bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE ||
- bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END ||
- bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_POP) {
- return 0;
- }
- /* same on EG */
- if (((bc->cf_last->inst != (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3)) &&
- (bc->cf_last->inst != (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE << 3))) ||
- LIST_IS_EMPTY(&bc->cf_last->alu)) {
- R600_ERR("last CF is not ALU (%p)\n", bc->cf_last);
- return -EINVAL;
- }
- alu = LIST_ENTRY(struct r600_bc_alu, bc->cf_last->alu.prev, list);
- if (!alu->last || !alu->nliteral || alu->literal_added) {
- return 0;
+ case CHIPREV_R700:
+ return 16;
+
+ case CHIPREV_EVERGREEN:
+ return 64;
+
+ default:
+ R600_ERR("Unknown chiprev %d.\n", bc->chiprev);
+ return 8;
}
- memcpy(alu->value, value, 4 * 4);
- bc->cf_last->ndw += alu->nliteral;
- bc->ndw += alu->nliteral;
- alu->literal_added = 1;
- return 0;
}
int r600_bc_add_vtx(struct r600_bc *bc, const struct r600_bc_vtx *vtx)
@@ -548,7 +1293,7 @@ int r600_bc_add_vtx(struct r600_bc *bc, const struct r600_bc_vtx *vtx)
/* each fetch use 4 dwords */
bc->cf_last->ndw += 4;
bc->ndw += 4;
- if ((bc->ndw / 4) > 7)
+ if ((bc->cf_last->ndw / 4) >= r600_bc_num_tex_and_vtx_instructions(bc))
bc->force_add_cf = 1;
return 0;
}
@@ -562,6 +1307,18 @@ int r600_bc_add_tex(struct r600_bc *bc, const struct r600_bc_tex *tex)
return -ENOMEM;
memcpy(ntex, tex, sizeof(struct r600_bc_tex));
+ /* we can't fetch data und use it as texture lookup address in the same TEX clause */
+ if (bc->cf_last != NULL &&
+ bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_TEX) {
+ struct r600_bc_tex *ttex;
+ LIST_FOR_EACH_ENTRY(ttex, &bc->cf_last->tex, list) {
+ if (ttex->dst_gpr == ntex->src_gpr) {
+ bc->force_add_cf = 1;
+ break;
+ }
+ }
+ }
+
/* cf can contains only alu or only vtx or only tex */
if (bc->cf_last == NULL ||
bc->cf_last->inst != V_SQ_CF_WORD1_SQ_CF_INST_TEX ||
@@ -573,11 +1330,17 @@ int r600_bc_add_tex(struct r600_bc *bc, const struct r600_bc_tex *tex)
}
bc->cf_last->inst = V_SQ_CF_WORD1_SQ_CF_INST_TEX;
}
+ if (ntex->src_gpr >= bc->ngpr) {
+ bc->ngpr = ntex->src_gpr + 1;
+ }
+ if (ntex->dst_gpr >= bc->ngpr) {
+ bc->ngpr = ntex->dst_gpr + 1;
+ }
LIST_ADDTAIL(&ntex->list, &bc->cf_last->tex);
/* each texture fetch use 4 dwords */
bc->cf_last->ndw += 4;
bc->ndw += 4;
- if ((bc->ndw / 4) > 7)
+ if ((bc->cf_last->ndw / 4) >= r600_bc_num_tex_and_vtx_instructions(bc))
bc->force_add_cf = 1;
return 0;
}
@@ -597,31 +1360,8 @@ int r600_bc_add_cfinst(struct r600_bc *bc, int inst)
/* common to all 3 families */
static int r600_bc_vtx_build(struct r600_bc *bc, struct r600_bc_vtx *vtx, unsigned id)
{
- unsigned fetch_resource_start = 0;
-
- /* check if we are fetch shader */
- /* fetch shader can also access vertex resource,
- * first fetch shader resource is at 160
- */
- if (bc->type == -1) {
- switch (bc->chiprev) {
- /* r600 */
- case CHIPREV_R600:
- /* r700 */
- case CHIPREV_R700:
- fetch_resource_start = 160;
- break;
- /* evergreen */
- case CHIPREV_EVERGREEN:
- fetch_resource_start = 0;
- break;
- default:
- fprintf(stderr, "%s:%s:%d unknown chiprev %d\n",
- __FILE__, __func__, __LINE__, bc->chiprev);
- break;
- }
- }
- bc->bytecode[id++] = S_SQ_VTX_WORD0_BUFFER_ID(vtx->buffer_id + fetch_resource_start) |
+ bc->bytecode[id++] = S_SQ_VTX_WORD0_BUFFER_ID(vtx->buffer_id) |
+ S_SQ_VTX_WORD0_FETCH_TYPE(vtx->fetch_type) |
S_SQ_VTX_WORD0_SRC_GPR(vtx->src_gpr) |
S_SQ_VTX_WORD0_SRC_SEL_X(vtx->src_sel_x) |
S_SQ_VTX_WORD0_MEGA_FETCH_COUNT(vtx->mega_fetch_count);
@@ -635,7 +1375,8 @@ static int r600_bc_vtx_build(struct r600_bc *bc, struct r600_bc_vtx *vtx, unsign
S_SQ_VTX_WORD1_FORMAT_COMP_ALL(vtx->format_comp_all) |
S_SQ_VTX_WORD1_SRF_MODE_ALL(vtx->srf_mode_all) |
S_SQ_VTX_WORD1_GPR_DST_GPR(vtx->dst_gpr);
- bc->bytecode[id++] = S_SQ_VTX_WORD2_MEGA_FETCH(1);
+ bc->bytecode[id++] = S_SQ_VTX_WORD2_OFFSET(vtx->offset) |
+ S_SQ_VTX_WORD2_MEGA_FETCH(1);
bc->bytecode[id++] = 0;
return 0;
}
@@ -673,8 +1414,6 @@ static int r600_bc_tex_build(struct r600_bc *bc, struct r600_bc_tex *tex, unsign
/* r600 only, r700/eg bits in r700_asm.c */
static int r600_bc_alu_build(struct r600_bc *bc, struct r600_bc_alu *alu, unsigned id)
{
- unsigned i;
-
/* don't replace gpr by pv or ps for destination register */
bc->bytecode[id++] = S_SQ_ALU_WORD0_SRC0_SEL(alu->src[0].sel) |
S_SQ_ALU_WORD0_SRC0_REL(alu->src[0].rel) |
@@ -705,22 +1444,23 @@ static int r600_bc_alu_build(struct r600_bc *bc, struct r600_bc_alu *alu, unsign
S_SQ_ALU_WORD1_OP2_SRC0_ABS(alu->src[0].abs) |
S_SQ_ALU_WORD1_OP2_SRC1_ABS(alu->src[1].abs) |
S_SQ_ALU_WORD1_OP2_WRITE_MASK(alu->dst.write) |
+ S_SQ_ALU_WORD1_OP2_OMOD(alu->omod) |
S_SQ_ALU_WORD1_OP2_ALU_INST(alu->inst) |
S_SQ_ALU_WORD1_BANK_SWIZZLE(alu->bank_swizzle) |
S_SQ_ALU_WORD1_OP2_UPDATE_EXECUTE_MASK(alu->predicate) |
S_SQ_ALU_WORD1_OP2_UPDATE_PRED(alu->predicate);
}
- if (alu->last) {
- if (alu->nliteral && !alu->literal_added) {
- R600_ERR("Bug in ALU processing for instruction 0x%08x, literal not added correctly\n", alu->inst);
- }
- for (i = 0; i < alu->nliteral; i++) {
- bc->bytecode[id++] = alu->value[i];
- }
- }
return 0;
}
+static void r600_bc_cf_vtx_build(uint32_t *bytecode, const struct r600_bc_cf *cf)
+{
+ *bytecode++ = S_SQ_CF_WORD0_ADDR(cf->addr >> 1);
+ *bytecode++ = S_SQ_CF_WORD1_CF_INST(cf->inst) |
+ S_SQ_CF_WORD1_BARRIER(1) |
+ S_SQ_CF_WORD1_COUNT((cf->ndw / 4) - 1);
+}
+
/* common for r600/r700 - eg in eg_asm.c */
static int r600_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf)
{
@@ -729,15 +1469,17 @@ static int r600_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf)
switch (cf->inst) {
case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3):
case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE << 3):
+ case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER << 3):
+ case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER << 3):
bc->bytecode[id++] = S_SQ_CF_ALU_WORD0_ADDR(cf->addr >> 1) |
- S_SQ_CF_ALU_WORD0_KCACHE_MODE0(cf->kcache0_mode) |
- S_SQ_CF_ALU_WORD0_KCACHE_BANK0(cf->kcache0_bank) |
- S_SQ_CF_ALU_WORD0_KCACHE_BANK1(cf->kcache1_bank);
+ S_SQ_CF_ALU_WORD0_KCACHE_MODE0(cf->kcache[0].mode) |
+ S_SQ_CF_ALU_WORD0_KCACHE_BANK0(cf->kcache[0].bank) |
+ S_SQ_CF_ALU_WORD0_KCACHE_BANK1(cf->kcache[1].bank);
bc->bytecode[id++] = S_SQ_CF_ALU_WORD1_CF_INST(cf->inst >> 3) |
- S_SQ_CF_ALU_WORD1_KCACHE_MODE1(cf->kcache1_mode) |
- S_SQ_CF_ALU_WORD1_KCACHE_ADDR0(cf->kcache0_addr) |
- S_SQ_CF_ALU_WORD1_KCACHE_ADDR1(cf->kcache1_addr) |
+ S_SQ_CF_ALU_WORD1_KCACHE_MODE1(cf->kcache[1].mode) |
+ S_SQ_CF_ALU_WORD1_KCACHE_ADDR0(cf->kcache[0].addr) |
+ S_SQ_CF_ALU_WORD1_KCACHE_ADDR1(cf->kcache[1].addr) |
S_SQ_CF_ALU_WORD1_BARRIER(1) |
S_SQ_CF_ALU_WORD1_USES_WATERFALL(bc->chiprev == CHIPREV_R600 ? cf->r6xx_uses_waterfall : 0) |
S_SQ_CF_ALU_WORD1_COUNT((cf->ndw / 2) - 1);
@@ -745,10 +1487,10 @@ static int r600_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf)
case V_SQ_CF_WORD1_SQ_CF_INST_TEX:
case V_SQ_CF_WORD1_SQ_CF_INST_VTX:
case V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC:
- bc->bytecode[id++] = S_SQ_CF_WORD0_ADDR(cf->addr >> 1);
- bc->bytecode[id++] = S_SQ_CF_WORD1_CF_INST(cf->inst) |
- S_SQ_CF_WORD1_BARRIER(1) |
- S_SQ_CF_WORD1_COUNT((cf->ndw / 4) - 1);
+ if (bc->chiprev == CHIPREV_R700)
+ r700_bc_cf_vtx_build(&bc->bytecode[id], cf);
+ else
+ r600_bc_cf_vtx_build(&bc->bytecode[id], cf);
break;
case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT:
case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE:
@@ -756,7 +1498,8 @@ static int r600_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf)
S_SQ_CF_ALLOC_EXPORT_WORD0_ELEM_SIZE(cf->output.elem_size) |
S_SQ_CF_ALLOC_EXPORT_WORD0_ARRAY_BASE(cf->output.array_base) |
S_SQ_CF_ALLOC_EXPORT_WORD0_TYPE(cf->output.type);
- bc->bytecode[id++] = S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_X(cf->output.swizzle_x) |
+ bc->bytecode[id++] = S_SQ_CF_ALLOC_EXPORT_WORD1_BURST_COUNT(cf->output.burst_count - 1) |
+ S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_X(cf->output.swizzle_x) |
S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Y(cf->output.swizzle_y) |
S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Z(cf->output.swizzle_z) |
S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_W(cf->output.swizzle_w) |
@@ -793,8 +1536,10 @@ int r600_bc_build(struct r600_bc *bc)
struct r600_bc_alu *alu;
struct r600_bc_vtx *vtx;
struct r600_bc_tex *tex;
+ uint32_t literal[4];
+ unsigned nliteral;
unsigned addr;
- int r;
+ int i, r;
if (bc->callstack[0].max > 0)
bc->nstack = ((bc->callstack[0].max + 3) >> 2) + 2;
@@ -808,6 +1553,8 @@ int r600_bc_build(struct r600_bc *bc)
LIST_FOR_EACH_ENTRY(cf, &bc->cf, list) {
switch (cf->inst) {
case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3):
+ case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER << 3):
+ case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER << 3):
case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE << 3):
break;
case V_SQ_CF_WORD1_SQ_CF_INST_TEX:
@@ -854,8 +1601,16 @@ int r600_bc_build(struct r600_bc *bc)
return r;
switch (cf->inst) {
case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3):
+ case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER << 3):
+ case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER << 3):
case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE << 3):
+ nliteral = 0;
+ memset(literal, 0, sizeof(literal));
LIST_FOR_EACH_ENTRY(alu, &cf->alu, list) {
+ r = r600_bc_alu_nliterals(bc, alu, literal, &nliteral);
+ if (r)
+ return r;
+ r600_bc_alu_adjust_literals(bc, alu, literal, nliteral);
switch(bc->chiprev) {
case CHIPREV_R600:
r = r600_bc_alu_build(bc, alu, addr);
@@ -872,7 +1627,11 @@ int r600_bc_build(struct r600_bc *bc)
return r;
addr += 2;
if (alu->last) {
- addr += alu->nliteral;
+ for (i = 0; i < align(nliteral, 2); ++i) {
+ bc->bytecode[addr++] = literal[i];
+ }
+ nliteral = 0;
+ memset(literal, 0, sizeof(literal));
}
}
break;
@@ -953,7 +1712,14 @@ void r600_bc_clear(struct r600_bc *bc)
void r600_bc_dump(struct r600_bc *bc)
{
- unsigned i;
+ struct r600_bc_cf *cf = NULL;
+ struct r600_bc_alu *alu = NULL;
+ struct r600_bc_vtx *vtx = NULL;
+ struct r600_bc_tex *tex = NULL;
+
+ unsigned i, id;
+ uint32_t literal[4];
+ unsigned nliteral;
char chip = '6';
switch (bc->chiprev) {
@@ -968,84 +1734,191 @@ void r600_bc_dump(struct r600_bc *bc)
chip = '6';
break;
}
- fprintf(stderr, "bytecode %d dw -----------------------\n", bc->ndw);
+ fprintf(stderr, "bytecode %d dw -- %d gprs ---------------------\n", bc->ndw, bc->ngpr);
fprintf(stderr, " %c\n", chip);
- for (i = 0; i < bc->ndw; i++) {
- fprintf(stderr, "0x%08X\n", bc->bytecode[i]);
- }
- fprintf(stderr, "--------------------------------------\n");
-}
-void r600_cf_vtx(struct r600_vertex_element *ve, u32 *bytecode, unsigned count)
-{
- struct r600_pipe_state *rstate;
- unsigned i = 0;
-
- if (count > 8) {
- bytecode[i++] = S_SQ_CF_WORD0_ADDR(8 >> 1);
- bytecode[i++] = S_SQ_CF_WORD1_CF_INST(V_SQ_CF_WORD1_SQ_CF_INST_VTX) |
- S_SQ_CF_WORD1_BARRIER(1) |
- S_SQ_CF_WORD1_COUNT(8 - 1);
- bytecode[i++] = S_SQ_CF_WORD0_ADDR(40 >> 1);
- bytecode[i++] = S_SQ_CF_WORD1_CF_INST(V_SQ_CF_WORD1_SQ_CF_INST_VTX) |
- S_SQ_CF_WORD1_BARRIER(1) |
- S_SQ_CF_WORD1_COUNT(count - 8 - 1);
- } else {
- bytecode[i++] = S_SQ_CF_WORD0_ADDR(8 >> 1);
- bytecode[i++] = S_SQ_CF_WORD1_CF_INST(V_SQ_CF_WORD1_SQ_CF_INST_VTX) |
- S_SQ_CF_WORD1_BARRIER(1) |
- S_SQ_CF_WORD1_COUNT(count - 1);
- }
- bytecode[i++] = S_SQ_CF_WORD0_ADDR(0);
- bytecode[i++] = S_SQ_CF_WORD1_CF_INST(V_SQ_CF_WORD1_SQ_CF_INST_RETURN) |
- S_SQ_CF_WORD1_BARRIER(1);
-
- rstate = &ve->rstate;
- rstate->id = R600_PIPE_STATE_FETCH_SHADER;
- rstate->nregs = 0;
- r600_pipe_state_add_reg(rstate, R_0288A4_SQ_PGM_RESOURCES_FS,
- 0x00000000, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R_0288DC_SQ_PGM_CF_OFFSET_FS,
- 0x00000000, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R_028894_SQ_PGM_START_FS,
- r600_bo_offset(ve->fetch_shader) >> 8,
- 0xFFFFFFFF, ve->fetch_shader);
-}
+ LIST_FOR_EACH_ENTRY(cf, &bc->cf, list) {
+ id = cf->id;
-void r600_cf_vtx_tc(struct r600_vertex_element *ve, u32 *bytecode, unsigned count)
-{
- struct r600_pipe_state *rstate;
- unsigned i = 0;
-
- if (count > 8) {
- bytecode[i++] = S_SQ_CF_WORD0_ADDR(8 >> 1);
- bytecode[i++] = S_SQ_CF_WORD1_CF_INST(V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC) |
- S_SQ_CF_WORD1_BARRIER(1) |
- S_SQ_CF_WORD1_COUNT(8 - 1);
- bytecode[i++] = S_SQ_CF_WORD0_ADDR(40 >> 1);
- bytecode[i++] = S_SQ_CF_WORD1_CF_INST(V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC) |
- S_SQ_CF_WORD1_BARRIER(1) |
- S_SQ_CF_WORD1_COUNT((count - 8) - 1);
- } else {
- bytecode[i++] = S_SQ_CF_WORD0_ADDR(8 >> 1);
- bytecode[i++] = S_SQ_CF_WORD1_CF_INST(V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC) |
- S_SQ_CF_WORD1_BARRIER(1) |
- S_SQ_CF_WORD1_COUNT(count - 1);
+ switch (cf->inst) {
+ case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3):
+ case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER << 3):
+ case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER << 3):
+ case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE << 3):
+ fprintf(stderr, "%04d %08X ALU ", id, bc->bytecode[id]);
+ fprintf(stderr, "ADDR:%d ", cf->addr);
+ fprintf(stderr, "KCACHE_MODE0:%X ", cf->kcache[0].mode);
+ fprintf(stderr, "KCACHE_BANK0:%X ", cf->kcache[0].bank);
+ fprintf(stderr, "KCACHE_BANK1:%X\n", cf->kcache[1].bank);
+ id++;
+ fprintf(stderr, "%04d %08X ALU ", id, bc->bytecode[id]);
+ fprintf(stderr, "INST:%d ", cf->inst);
+ fprintf(stderr, "KCACHE_MODE1:%X ", cf->kcache[1].mode);
+ fprintf(stderr, "KCACHE_ADDR0:%X ", cf->kcache[0].addr);
+ fprintf(stderr, "KCACHE_ADDR1:%X ", cf->kcache[1].addr);
+ fprintf(stderr, "COUNT:%d\n", cf->ndw / 2);
+ break;
+ case V_SQ_CF_WORD1_SQ_CF_INST_TEX:
+ case V_SQ_CF_WORD1_SQ_CF_INST_VTX:
+ case V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC:
+ fprintf(stderr, "%04d %08X TEX/VTX ", id, bc->bytecode[id]);
+ fprintf(stderr, "ADDR:%d\n", cf->addr);
+ id++;
+ fprintf(stderr, "%04d %08X TEX/VTX ", id, bc->bytecode[id]);
+ fprintf(stderr, "INST:%d ", cf->inst);
+ fprintf(stderr, "COUNT:%d\n", cf->ndw / 4);
+ break;
+ case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT:
+ case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE:
+ fprintf(stderr, "%04d %08X EXPORT ", id, bc->bytecode[id]);
+ fprintf(stderr, "GPR:%X ", cf->output.gpr);
+ fprintf(stderr, "ELEM_SIZE:%X ", cf->output.elem_size);
+ fprintf(stderr, "ARRAY_BASE:%X ", cf->output.array_base);
+ fprintf(stderr, "TYPE:%X\n", cf->output.type);
+ id++;
+ fprintf(stderr, "%04d %08X EXPORT ", id, bc->bytecode[id]);
+ fprintf(stderr, "SWIZ_X:%X ", cf->output.swizzle_x);
+ fprintf(stderr, "SWIZ_Y:%X ", cf->output.swizzle_y);
+ fprintf(stderr, "SWIZ_Z:%X ", cf->output.swizzle_z);
+ fprintf(stderr, "SWIZ_W:%X ", cf->output.swizzle_w);
+ fprintf(stderr, "BARRIER:%X ", cf->output.barrier);
+ fprintf(stderr, "INST:%d ", cf->output.inst);
+ fprintf(stderr, "BURST_COUNT:%d ", cf->output.burst_count);
+ fprintf(stderr, "EOP:%X\n", cf->output.end_of_program);
+ break;
+ case V_SQ_CF_WORD1_SQ_CF_INST_JUMP:
+ case V_SQ_CF_WORD1_SQ_CF_INST_ELSE:
+ case V_SQ_CF_WORD1_SQ_CF_INST_POP:
+ case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL:
+ case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END:
+ case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE:
+ case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK:
+ case V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS:
+ case V_SQ_CF_WORD1_SQ_CF_INST_RETURN:
+ fprintf(stderr, "%04d %08X CF ", id, bc->bytecode[id]);
+ fprintf(stderr, "ADDR:%d\n", cf->cf_addr);
+ id++;
+ fprintf(stderr, "%04d %08X CF ", id, bc->bytecode[id]);
+ fprintf(stderr, "INST:%d ", cf->inst);
+ fprintf(stderr, "COND:%X ", cf->cond);
+ fprintf(stderr, "POP_COUNT:%X\n", cf->pop_count);
+ break;
+ }
+
+ id = cf->addr;
+ nliteral = 0;
+ LIST_FOR_EACH_ENTRY(alu, &cf->alu, list) {
+ r600_bc_alu_nliterals(bc, alu, literal, &nliteral);
+
+ fprintf(stderr, "%04d %08X ", id, bc->bytecode[id]);
+ fprintf(stderr, "SRC0(SEL:%d ", alu->src[0].sel);
+ fprintf(stderr, "REL:%d ", alu->src[0].rel);
+ fprintf(stderr, "CHAN:%d ", alu->src[0].chan);
+ fprintf(stderr, "NEG:%d) ", alu->src[0].neg);
+ fprintf(stderr, "SRC1(SEL:%d ", alu->src[1].sel);
+ fprintf(stderr, "REL:%d ", alu->src[1].rel);
+ fprintf(stderr, "CHAN:%d ", alu->src[1].chan);
+ fprintf(stderr, "NEG:%d) ", alu->src[1].neg);
+ fprintf(stderr, "LAST:%d)\n", alu->last);
+ id++;
+ fprintf(stderr, "%04d %08X %c ", id, bc->bytecode[id], alu->last ? '*' : ' ');
+ fprintf(stderr, "INST:%d ", alu->inst);
+ fprintf(stderr, "DST(SEL:%d ", alu->dst.sel);
+ fprintf(stderr, "CHAN:%d ", alu->dst.chan);
+ fprintf(stderr, "REL:%d ", alu->dst.rel);
+ fprintf(stderr, "CLAMP:%d) ", alu->dst.clamp);
+ fprintf(stderr, "BANK_SWIZZLE:%d ", alu->bank_swizzle);
+ if (alu->is_op3) {
+ fprintf(stderr, "SRC2(SEL:%d ", alu->src[2].sel);
+ fprintf(stderr, "REL:%d ", alu->src[2].rel);
+ fprintf(stderr, "CHAN:%d ", alu->src[2].chan);
+ fprintf(stderr, "NEG:%d)\n", alu->src[2].neg);
+ } else {
+ fprintf(stderr, "SRC0_ABS:%d ", alu->src[0].abs);
+ fprintf(stderr, "SRC1_ABS:%d ", alu->src[1].abs);
+ fprintf(stderr, "WRITE_MASK:%d ", alu->dst.write);
+ fprintf(stderr, "OMOD:%d ", alu->omod);
+ fprintf(stderr, "EXECUTE_MASK:%d ", alu->predicate);
+ fprintf(stderr, "UPDATE_PRED:%d\n", alu->predicate);
+ }
+
+ id++;
+ if (alu->last) {
+ for (i = 0; i < nliteral; i++, id++) {
+ float *f = (float*)(bc->bytecode + id);
+ fprintf(stderr, "%04d %08X\t%f\n", id, bc->bytecode[id], *f);
+ }
+ id += nliteral & 1;
+ nliteral = 0;
+ }
+ }
+
+ LIST_FOR_EACH_ENTRY(tex, &cf->tex, list) {
+ fprintf(stderr, "%04d %08X ", id, bc->bytecode[id]);
+ fprintf(stderr, "INST:%d ", tex->inst);
+ fprintf(stderr, "RESOURCE_ID:%d ", tex->resource_id);
+ fprintf(stderr, "SRC(GPR:%d ", tex->src_gpr);
+ fprintf(stderr, "REL:%d)\n", tex->src_rel);
+ id++;
+ fprintf(stderr, "%04d %08X ", id, bc->bytecode[id]);
+ fprintf(stderr, "DST(GPR:%d ", tex->dst_gpr);
+ fprintf(stderr, "REL:%d ", tex->dst_rel);
+ fprintf(stderr, "SEL_X:%d ", tex->dst_sel_x);
+ fprintf(stderr, "SEL_Y:%d ", tex->dst_sel_y);
+ fprintf(stderr, "SEL_Z:%d ", tex->dst_sel_z);
+ fprintf(stderr, "SEL_W:%d) ", tex->dst_sel_w);
+ fprintf(stderr, "LOD_BIAS:%d ", tex->lod_bias);
+ fprintf(stderr, "COORD_TYPE_X:%d ", tex->coord_type_x);
+ fprintf(stderr, "COORD_TYPE_Y:%d ", tex->coord_type_y);
+ fprintf(stderr, "COORD_TYPE_Z:%d ", tex->coord_type_z);
+ fprintf(stderr, "COORD_TYPE_W:%d\n", tex->coord_type_w);
+ id++;
+ fprintf(stderr, "%04d %08X ", id, bc->bytecode[id]);
+ fprintf(stderr, "OFFSET_X:%d ", tex->offset_x);
+ fprintf(stderr, "OFFSET_Y:%d ", tex->offset_y);
+ fprintf(stderr, "OFFSET_Z:%d ", tex->offset_z);
+ fprintf(stderr, "SAMPLER_ID:%d ", tex->sampler_id);
+ fprintf(stderr, "SRC(SEL_X:%d ", tex->src_sel_x);
+ fprintf(stderr, "SEL_Y:%d ", tex->src_sel_y);
+ fprintf(stderr, "SEL_Z:%d ", tex->src_sel_z);
+ fprintf(stderr, "SEL_W:%d)\n", tex->src_sel_w);
+ id++;
+ fprintf(stderr, "%04d %08X \n", id, bc->bytecode[id]);
+ id++;
+ }
+
+ LIST_FOR_EACH_ENTRY(vtx, &cf->vtx, list) {
+ fprintf(stderr, "%04d %08X ", id, bc->bytecode[id]);
+ fprintf(stderr, "INST:%d ", vtx->inst);
+ fprintf(stderr, "FETCH_TYPE:%d ", vtx->fetch_type);
+ fprintf(stderr, "BUFFER_ID:%d\n", vtx->buffer_id);
+ id++;
+ /* This assumes that no semantic fetches exist */
+ fprintf(stderr, "%04d %08X ", id, bc->bytecode[id]);
+ fprintf(stderr, "SRC(GPR:%d ", vtx->src_gpr);
+ fprintf(stderr, "SEL_X:%d) ", vtx->src_sel_x);
+ fprintf(stderr, "MEGA_FETCH_COUNT:%d ", vtx->mega_fetch_count);
+ fprintf(stderr, "DST(GPR:%d ", vtx->dst_gpr);
+ fprintf(stderr, "SEL_X:%d ", vtx->dst_sel_x);
+ fprintf(stderr, "SEL_Y:%d ", vtx->dst_sel_y);
+ fprintf(stderr, "SEL_Z:%d ", vtx->dst_sel_z);
+ fprintf(stderr, "SEL_W:%d) ", vtx->dst_sel_w);
+ fprintf(stderr, "USE_CONST_FIELDS:%d ", vtx->use_const_fields);
+ fprintf(stderr, "FORMAT(DATA:%d ", vtx->data_format);
+ fprintf(stderr, "NUM:%d ", vtx->num_format_all);
+ fprintf(stderr, "COMP:%d ", vtx->format_comp_all);
+ fprintf(stderr, "MODE:%d)\n", vtx->srf_mode_all);
+ id++;
+ fprintf(stderr, "%04d %08X ", id, bc->bytecode[id]);
+ fprintf(stderr, "OFFSET:%d\n", vtx->offset);
+ //TODO
+ id++;
+ fprintf(stderr, "%04d %08X \n", id, bc->bytecode[id]);
+ id++;
+ }
}
- bytecode[i++] = S_SQ_CF_WORD0_ADDR(0);
- bytecode[i++] = S_SQ_CF_WORD1_CF_INST(V_SQ_CF_WORD1_SQ_CF_INST_RETURN) |
- S_SQ_CF_WORD1_BARRIER(1);
-
- rstate = &ve->rstate;
- rstate->id = R600_PIPE_STATE_FETCH_SHADER;
- rstate->nregs = 0;
- r600_pipe_state_add_reg(rstate, R_0288A4_SQ_PGM_RESOURCES_FS,
- 0x00000000, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R_0288DC_SQ_PGM_CF_OFFSET_FS,
- 0x00000000, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R_028894_SQ_PGM_START_FS,
- r600_bo_offset(ve->fetch_shader) >> 8,
- 0xFFFFFFFF, ve->fetch_shader);
+
+ fprintf(stderr, "--------------------------------------\n");
}
static void r600_vertex_data_type(enum pipe_format pformat, unsigned *format,
@@ -1071,7 +1944,7 @@ static void r600_vertex_data_type(enum pipe_format pformat, unsigned *format,
}
switch (desc->channel[i].type) {
- /* Half-floats, floats, doubles */
+ /* Half-floats, floats, ints */
case UTIL_FORMAT_TYPE_FLOAT:
switch (desc->channel[i].size) {
case 16:
@@ -1083,8 +1956,6 @@ static void r600_vertex_data_type(enum pipe_format pformat, unsigned *format,
*format = FMT_16_16_FLOAT;
break;
case 3:
- *format = FMT_16_16_16_FLOAT;
- break;
case 4:
*format = FMT_16_16_16_16_FLOAT;
break;
@@ -1124,8 +1995,6 @@ static void r600_vertex_data_type(enum pipe_format pformat, unsigned *format,
*format = FMT_8_8;
break;
case 3:
- // *format = FMT_8_8_8; /* fails piglit draw-vertices test */
- // break;
case 4:
*format = FMT_8_8_8_8;
break;
@@ -1140,8 +2009,6 @@ static void r600_vertex_data_type(enum pipe_format pformat, unsigned *format,
*format = FMT_16_16;
break;
case 3:
- // *format = FMT_16_16_16; /* fails piglit draw-vertices test */
- // break;
case 4:
*format = FMT_16_16_16_16;
break;
@@ -1184,64 +2051,21 @@ out_unknown:
R600_ERR("unsupported vertex format %s\n", util_format_name(pformat));
}
-static void r600_bc(unsigned ndw, unsigned chiprev, u32 *bytecode)
-{
- unsigned i;
- char chip = '6';
-
- switch (chiprev) {
- case 1:
- chip = '7';
- break;
- case 2:
- chip = 'E';
- break;
- case 0:
- default:
- chip = '6';
- break;
- }
- fprintf(stderr, "bytecode %d dw -----------------------\n", ndw);
- fprintf(stderr, " %c\n", chip);
- for (i = 0; i < ndw; i++) {
- fprintf(stderr, "0x%08X\n", bytecode[i]);
- }
- fprintf(stderr, "--------------------------------------\n");
-}
-
int r600_vertex_elements_build_fetch_shader(struct r600_pipe_context *rctx, struct r600_vertex_element *ve)
{
- unsigned ndw, i;
- u32 *bytecode;
- unsigned fetch_resource_start = 0, format, num_format, format_comp;
+ static int dump_shaders = -1;
+
+ struct r600_bc bc;
+ struct r600_bc_vtx vtx;
struct pipe_vertex_element *elements = ve->elements;
const struct util_format_description *desc;
-
- /* 2 dwords for cf aligned to 4 + 4 dwords per input */
- ndw = 8 + ve->count * 4;
- ve->fs_size = ndw * 4;
-
- /* use PIPE_BIND_VERTEX_BUFFER so we use the cache buffer manager */
- ve->fetch_shader = r600_bo(rctx->radeon, ndw*4, 256, PIPE_BIND_VERTEX_BUFFER, 0);
- if (ve->fetch_shader == NULL) {
- return -ENOMEM;
- }
-
- bytecode = r600_bo_map(rctx->radeon, ve->fetch_shader, 0, NULL);
- if (bytecode == NULL) {
- r600_bo_reference(rctx->radeon, &ve->fetch_shader, NULL);
- return -ENOMEM;
- }
-
- if (rctx->family >= CHIP_CEDAR) {
- eg_cf_vtx(ve, &bytecode[0], (ndw - 8) / 4);
- } else {
- r600_cf_vtx(ve, &bytecode[0], (ndw - 8) / 4);
- fetch_resource_start = 160;
- }
+ unsigned fetch_resource_start = rctx->family >= CHIP_CEDAR ? 0 : 160;
+ unsigned format, num_format, format_comp;
+ u32 *bytecode;
+ int i, r;
/* vertex elements offset need special handling, if offset is bigger
- * than what we can put in fetch instruction then we need to alterate
+ + * than what we can put in fetch instruction then we need to alterate
* the vertex resource offset. In such case in order to simplify code
* we will bound one resource per elements. It's a worst case scenario.
*/
@@ -1252,40 +2076,111 @@ int r600_vertex_elements_build_fetch_shader(struct r600_pipe_context *rctx, stru
}
}
+ memset(&bc, 0, sizeof(bc));
+ r = r600_bc_init(&bc, r600_get_family(rctx->radeon));
+ if (r)
+ return r;
+
+ for (i = 0; i < ve->count; i++) {
+ if (elements[i].instance_divisor > 1) {
+ struct r600_bc_alu alu;
+
+ memset(&alu, 0, sizeof(alu));
+ alu.inst = BC_INST(&bc, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULHI_UINT);
+ alu.src[0].sel = 0;
+ alu.src[0].chan = 3;
+
+ alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
+ alu.src[1].value = (1ll << 32) / elements[i].instance_divisor + 1;
+
+ alu.dst.sel = i + 1;
+ alu.dst.chan = 3;
+ alu.dst.write = 1;
+ alu.last = 1;
+
+ if ((r = r600_bc_add_alu(&bc, &alu))) {
+ r600_bc_clear(&bc);
+ return r;
+ }
+ }
+ }
+
for (i = 0; i < ve->count; i++) {
unsigned vbuffer_index;
- r600_vertex_data_type(ve->hw_format[i], &format, &num_format, &format_comp);
- desc = util_format_description(ve->hw_format[i]);
+ r600_vertex_data_type(ve->elements[i].src_format, &format, &num_format, &format_comp);
+ desc = util_format_description(ve->elements[i].src_format);
if (desc == NULL) {
- R600_ERR("unknown format %d\n", ve->hw_format[i]);
- r600_bo_reference(rctx->radeon, &ve->fetch_shader, NULL);
+ r600_bc_clear(&bc);
+ R600_ERR("unknown format %d\n", ve->elements[i].src_format);
return -EINVAL;
}
/* see above for vbuffer_need_offset explanation */
vbuffer_index = elements[i].vertex_buffer_index;
- if (ve->vbuffer_need_offset) {
- bytecode[8 + i * 4 + 0] = S_SQ_VTX_WORD0_BUFFER_ID(i + fetch_resource_start);
- } else {
- bytecode[8 + i * 4 + 0] = S_SQ_VTX_WORD0_BUFFER_ID(vbuffer_index + fetch_resource_start);
+ memset(&vtx, 0, sizeof(vtx));
+ vtx.buffer_id = (ve->vbuffer_need_offset ? i : vbuffer_index) + fetch_resource_start;
+ vtx.fetch_type = elements[i].instance_divisor ? 1 : 0;
+ vtx.src_gpr = elements[i].instance_divisor > 1 ? i + 1 : 0;
+ vtx.src_sel_x = elements[i].instance_divisor ? 3 : 0;
+ vtx.mega_fetch_count = 0x1F;
+ vtx.dst_gpr = i + 1;
+ vtx.dst_sel_x = desc->swizzle[0];
+ vtx.dst_sel_y = desc->swizzle[1];
+ vtx.dst_sel_z = desc->swizzle[2];
+ vtx.dst_sel_w = desc->swizzle[3];
+ vtx.data_format = format;
+ vtx.num_format_all = num_format;
+ vtx.format_comp_all = format_comp;
+ vtx.srf_mode_all = 1;
+ vtx.offset = elements[i].src_offset;
+
+ if ((r = r600_bc_add_vtx(&bc, &vtx))) {
+ r600_bc_clear(&bc);
+ return r;
}
- bytecode[8 + i * 4 + 0] |= S_SQ_VTX_WORD0_SRC_GPR(0) |
- S_SQ_VTX_WORD0_SRC_SEL_X(0) |
- S_SQ_VTX_WORD0_MEGA_FETCH_COUNT(0x1F);
- bytecode[8 + i * 4 + 1] = S_SQ_VTX_WORD1_DST_SEL_X(desc->swizzle[0]) |
- S_SQ_VTX_WORD1_DST_SEL_Y(desc->swizzle[1]) |
- S_SQ_VTX_WORD1_DST_SEL_Z(desc->swizzle[2]) |
- S_SQ_VTX_WORD1_DST_SEL_W(desc->swizzle[3]) |
- S_SQ_VTX_WORD1_USE_CONST_FIELDS(0) |
- S_SQ_VTX_WORD1_DATA_FORMAT(format) |
- S_SQ_VTX_WORD1_NUM_FORMAT_ALL(num_format) |
- S_SQ_VTX_WORD1_FORMAT_COMP_ALL(format_comp) |
- S_SQ_VTX_WORD1_SRF_MODE_ALL(1) |
- S_SQ_VTX_WORD1_GPR_DST_GPR(i + 1);
- bytecode[8 + i * 4 + 2] = S_SQ_VTX_WORD2_OFFSET(elements[i].src_offset) |
- S_SQ_VTX_WORD2_MEGA_FETCH(1);
- bytecode[8 + i * 4 + 3] = 0;
}
+
+ r600_bc_add_cfinst(&bc, BC_INST(&bc, V_SQ_CF_WORD1_SQ_CF_INST_RETURN));
+
+ if ((r = r600_bc_build(&bc))) {
+ r600_bc_clear(&bc);
+ return r;
+ }
+
+ if (dump_shaders == -1)
+ dump_shaders = debug_get_bool_option("R600_DUMP_SHADERS", FALSE);
+
+ if (dump_shaders) {
+ fprintf(stderr, "--------------------------------------------------------------\n");
+ r600_bc_dump(&bc);
+ fprintf(stderr, "______________________________________________________________\n");
+ }
+
+ ve->fs_size = bc.ndw*4;
+
+ /* use PIPE_BIND_VERTEX_BUFFER so we use the cache buffer manager */
+ ve->fetch_shader = r600_bo(rctx->radeon, ve->fs_size, 256, PIPE_BIND_VERTEX_BUFFER, 0);
+ if (ve->fetch_shader == NULL) {
+ r600_bc_clear(&bc);
+ return -ENOMEM;
+ }
+
+ bytecode = r600_bo_map(rctx->radeon, ve->fetch_shader, 0, NULL);
+ if (bytecode == NULL) {
+ r600_bc_clear(&bc);
+ r600_bo_reference(rctx->radeon, &ve->fetch_shader, NULL);
+ return -ENOMEM;
+ }
+
+ memcpy(bytecode, bc.bytecode, ve->fs_size);
+
r600_bo_unmap(rctx->radeon, ve->fetch_shader);
+ r600_bc_clear(&bc);
+
+ if (rctx->family >= CHIP_CEDAR)
+ evergreen_fetch_shader(ve);
+ else
+ r600_fetch_shader(ve);
+
return 0;
}
diff --git a/src/gallium/drivers/r600/r600_asm.h b/src/gallium/drivers/r600/r600_asm.h
index b147f0f5c8..27ea293ebe 100644
--- a/src/gallium/drivers/r600/r600_asm.h
+++ b/src/gallium/drivers/r600/r600_asm.h
@@ -25,9 +25,6 @@
#include "util/u_double_list.h"
-#define NUM_OF_CYCLES 3
-#define NUM_OF_COMPONENTS 4
-
struct r600_vertex_element;
struct r600_pipe_context;
@@ -37,6 +34,7 @@ struct r600_bc_alu_src {
unsigned neg;
unsigned abs;
unsigned rel;
+ uint32_t value;
};
struct r600_bc_alu_dst {
@@ -49,19 +47,15 @@ struct r600_bc_alu_dst {
struct r600_bc_alu {
struct list_head list;
- struct list_head bs_list; /* bank swizzle list */
struct r600_bc_alu_src src[3];
struct r600_bc_alu_dst dst;
unsigned inst;
unsigned last;
unsigned is_op3;
unsigned predicate;
- unsigned nliteral;
- unsigned literal_added;
unsigned bank_swizzle;
unsigned bank_swizzle_force;
- u32 value[4];
- int hw_gpr[NUM_OF_CYCLES][NUM_OF_COMPONENTS];
+ unsigned omod;
};
struct r600_bc_tex {
@@ -109,6 +103,7 @@ struct r600_bc_vtx {
unsigned num_format_all;
unsigned format_comp_all;
unsigned srf_mode_all;
+ unsigned offset;
};
struct r600_bc_output {
@@ -122,9 +117,16 @@ struct r600_bc_output {
unsigned swizzle_y;
unsigned swizzle_z;
unsigned swizzle_w;
+ unsigned burst_count;
unsigned barrier;
};
+struct r600_bc_kcache {
+ unsigned bank;
+ unsigned mode;
+ unsigned addr;
+};
+
struct r600_bc_cf {
struct list_head list;
unsigned inst;
@@ -134,18 +136,15 @@ struct r600_bc_cf {
unsigned cond;
unsigned pop_count;
unsigned cf_addr; /* control flow addr */
- unsigned kcache0_mode;
- unsigned kcache1_mode;
- unsigned kcache0_addr;
- unsigned kcache1_addr;
- unsigned kcache0_bank;
- unsigned kcache1_bank;
+ struct r600_bc_kcache kcache[2];
unsigned r6xx_uses_waterfall;
struct list_head alu;
struct list_head tex;
struct list_head vtx;
struct r600_bc_output output;
struct r600_bc_alu *curr_bs_head;
+ struct r600_bc_alu *prev_bs_head;
+ struct r600_bc_alu *prev2_bs_head;
};
#define FC_NONE 0
@@ -191,26 +190,24 @@ struct r600_bc {
/* eg_asm.c */
int eg_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf);
-void eg_cf_vtx(struct r600_vertex_element *ve, u32 *bytecode, unsigned count);
/* r600_asm.c */
int r600_bc_init(struct r600_bc *bc, enum radeon_family family);
void r600_bc_clear(struct r600_bc *bc);
int r600_bc_add_alu(struct r600_bc *bc, const struct r600_bc_alu *alu);
-int r600_bc_add_literal(struct r600_bc *bc, const u32 *value);
int r600_bc_add_vtx(struct r600_bc *bc, const struct r600_bc_vtx *vtx);
int r600_bc_add_tex(struct r600_bc *bc, const struct r600_bc_tex *tex);
int r600_bc_add_output(struct r600_bc *bc, const struct r600_bc_output *output);
int r600_bc_build(struct r600_bc *bc);
int r600_bc_add_cfinst(struct r600_bc *bc, int inst);
int r600_bc_add_alu_type(struct r600_bc *bc, const struct r600_bc_alu *alu, int type);
+void r600_bc_special_constants(u32 value, unsigned *sel, unsigned *neg);
void r600_bc_dump(struct r600_bc *bc);
-void r600_cf_vtx(struct r600_vertex_element *ve, u32 *bytecode, unsigned count);
-void r600_cf_vtx_tc(struct r600_vertex_element *ve, u32 *bytecode, unsigned count);
int r600_vertex_elements_build_fetch_shader(struct r600_pipe_context *rctx, struct r600_vertex_element *ve);
/* r700_asm.c */
+void r700_bc_cf_vtx_build(uint32_t *bytecode, const struct r600_bc_cf *cf);
int r700_bc_alu_build(struct r600_bc *bc, struct r600_bc_alu *alu, unsigned id);
#endif
diff --git a/src/gallium/drivers/r600/r600_blit.c b/src/gallium/drivers/r600/r600_blit.c
index 0f04136fb2..04408a5cc8 100644
--- a/src/gallium/drivers/r600/r600_blit.c
+++ b/src/gallium/drivers/r600/r600_blit.c
@@ -36,6 +36,7 @@ static void r600_blitter_begin(struct pipe_context *ctx, enum r600_blitter_op op
{
struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
+ rctx->blit = true;
r600_context_queries_suspend(&rctx->ctx);
util_blitter_save_blend(rctx->blitter, rctx->states[R600_PIPE_STATE_BLEND]);
@@ -53,9 +54,9 @@ static void r600_blitter_begin(struct pipe_context *ctx, enum r600_blitter_op op
if (rctx->states[R600_PIPE_STATE_CLIP]) {
util_blitter_save_clip(rctx->blitter, &rctx->clip);
}
- util_blitter_save_vertex_buffers(rctx->blitter, rctx->nvertex_buffer, rctx->vertex_buffer);
-
- rctx->vertex_elements = NULL;
+ util_blitter_save_vertex_buffers(rctx->blitter,
+ rctx->vbuf_mgr->nr_vertex_buffers,
+ rctx->vbuf_mgr->vertex_buffer);
if (op & (R600_CLEAR_SURFACE | R600_COPY))
util_blitter_save_framebuffer(rctx->blitter, &rctx->framebuffer);
@@ -76,21 +77,26 @@ static void r600_blitter_end(struct pipe_context *ctx)
{
struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
r600_context_queries_resume(&rctx->ctx);
+ rctx->blit = false;
}
-int r600_blit_uncompress_depth(struct pipe_context *ctx, struct r600_resource_texture *texture)
+void r600_blit_uncompress_depth(struct pipe_context *ctx, struct r600_resource_texture *texture)
{
struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
struct pipe_surface *zsurf, *cbsurf, surf_tmpl;
int level = 0;
float depth = 1.0f;
- surf_tmpl.format = texture->resource.base.b.format;
+
+ if (!texture->dirty_db)
+ return;
+
+ surf_tmpl.format = texture->resource.b.b.b.format;
surf_tmpl.u.tex.level = level;
surf_tmpl.u.tex.first_layer = 0;
surf_tmpl.u.tex.last_layer = 0;
surf_tmpl.usage = PIPE_BIND_DEPTH_STENCIL;
- zsurf = ctx->create_surface(ctx, &texture->resource.base.b, &surf_tmpl);
+ zsurf = ctx->create_surface(ctx, &texture->resource.b.b.b, &surf_tmpl);
surf_tmpl.format = ((struct pipe_resource*)texture->flushed_depth_texture)->format;
surf_tmpl.usage = PIPE_BIND_RENDER_TARGET;
@@ -108,8 +114,47 @@ int r600_blit_uncompress_depth(struct pipe_context *ctx, struct r600_resource_te
pipe_surface_reference(&zsurf, NULL);
pipe_surface_reference(&cbsurf, NULL);
+ texture->dirty_db = FALSE;
+}
+
+void r600_flush_depth_textures(struct r600_pipe_context *rctx)
+{
+ unsigned int i;
+
+ if (rctx->blit) return;
+
+ /* FIXME: This handles fragment shader textures only. */
+
+ for (i = 0; i < rctx->ps_samplers.n_views; ++i) {
+ struct r600_pipe_sampler_view *view;
+ struct r600_resource_texture *tex;
+
+ view = rctx->ps_samplers.views[i];
+ if (!view) continue;
+
+ tex = (struct r600_resource_texture *)view->base.texture;
+ if (!tex->depth)
+ continue;
+
+ if (tex->is_flushing_texture)
+ continue;
- return 0;
+ r600_blit_uncompress_depth(&rctx->context, tex);
+ }
+
+ /* also check CB here */
+ for (i = 0; i < rctx->framebuffer.nr_cbufs; i++) {
+ struct r600_resource_texture *tex;
+ tex = (struct r600_resource_texture *)rctx->framebuffer.cbufs[i]->texture;
+
+ if (!tex->depth)
+ continue;
+
+ if (tex->is_flushing_texture)
+ continue;
+
+ r600_blit_uncompress_depth(&rctx->context, tex);
+ }
}
static void r600_clear(struct pipe_context *ctx, unsigned buffers,
@@ -174,6 +219,52 @@ static void r600_hw_copy_region(struct pipe_context *ctx,
r600_blitter_end(ctx);
}
+struct texture_orig_info {
+ unsigned format;
+ unsigned width0;
+ unsigned height0;
+};
+
+static void r600_compressed_to_blittable(struct pipe_resource *tex,
+ unsigned level,
+ struct texture_orig_info *orig)
+{
+ struct r600_resource_texture *rtex = (struct r600_resource_texture*)tex;
+ unsigned pixsize = util_format_get_blocksize(tex->format);
+ int new_format;
+ int new_height, new_width;
+
+ orig->format = tex->format;
+ orig->width0 = tex->width0;
+ orig->height0 = tex->height0;
+
+ if (pixsize == 8)
+ new_format = PIPE_FORMAT_R16G16B16A16_UNORM; /* 64-bit block */
+ else
+ new_format = PIPE_FORMAT_R32G32B32A32_UNORM; /* 128-bit block */
+
+ new_width = util_format_get_nblocksx(tex->format, orig->width0);
+ new_height = util_format_get_nblocksy(tex->format, orig->height0);
+
+ rtex->force_int_type = true;
+ tex->width0 = new_width;
+ tex->height0 = new_height;
+ tex->format = new_format;
+
+}
+
+static void r600_reset_blittable_to_compressed(struct pipe_resource *tex,
+ unsigned level,
+ struct texture_orig_info *orig)
+{
+ struct r600_resource_texture *rtex = (struct r600_resource_texture*)tex;
+ rtex->force_int_type = false;
+
+ tex->format = orig->format;
+ tex->width0 = orig->width0;
+ tex->height0 = orig->height0;
+}
+
static void r600_resource_copy_region(struct pipe_context *ctx,
struct pipe_resource *dst,
unsigned dst_level,
@@ -182,15 +273,36 @@ static void r600_resource_copy_region(struct pipe_context *ctx,
unsigned src_level,
const struct pipe_box *src_box)
{
- boolean is_depth;
- /* there is something wrong with depth resource copies at the moment so avoid them for now */
- is_depth = util_format_get_component_bits(src->format, UTIL_FORMAT_COLORSPACE_ZS, 0) != 0;
- if (is_depth)
- util_resource_copy_region(ctx, dst, dst_level, dstx, dsty, dstz,
- src, src_level, src_box);
- else
- r600_hw_copy_region(ctx, dst, dst_level, dstx, dsty, dstz,
- src, src_level, src_box);
+ struct r600_resource_texture *rsrc = (struct r600_resource_texture*)src;
+ struct texture_orig_info orig_info[2];
+ boolean restore_orig[2];
+
+ if (rsrc->depth && !rsrc->is_flushing_texture)
+ r600_texture_depth_flush(ctx, src, FALSE);
+
+ restore_orig[0] = restore_orig[1] = FALSE;
+
+ if (util_format_is_compressed(src->format)) {
+ r600_compressed_to_blittable(src, src_level, &orig_info[0]);
+ restore_orig[0] = TRUE;
+ }
+
+ if (util_format_is_compressed(dst->format)) {
+ r600_compressed_to_blittable(dst, dst_level, &orig_info[1]);
+ restore_orig[1] = TRUE;
+ /* translate the dst box as well */
+ dstx = util_format_get_nblocksx(orig_info[1].format, dstx);
+ dsty = util_format_get_nblocksy(orig_info[1].format, dsty);
+ }
+
+ r600_hw_copy_region(ctx, dst, dst_level, dstx, dsty, dstz,
+ src, src_level, src_box);
+
+ if (restore_orig[0])
+ r600_reset_blittable_to_compressed(src, src_level, &orig_info[0]);
+
+ if (restore_orig[1])
+ r600_reset_blittable_to_compressed(dst, dst_level, &orig_info[1]);
}
void r600_init_blit_functions(struct r600_pipe_context *rctx)
@@ -200,3 +312,19 @@ void r600_init_blit_functions(struct r600_pipe_context *rctx)
rctx->context.clear_depth_stencil = r600_clear_depth_stencil;
rctx->context.resource_copy_region = r600_resource_copy_region;
}
+
+void r600_blit_push_depth(struct pipe_context *ctx, struct r600_resource_texture *texture)
+{
+ struct pipe_box sbox;
+
+ sbox.x = sbox.y = sbox.z = 0;
+ sbox.width = texture->resource.b.b.b.width0;
+ sbox.height = texture->resource.b.b.b.height0;
+ /* XXX that might be wrong */
+ sbox.depth = 1;
+
+ r600_hw_copy_region(ctx, (struct pipe_resource *)texture, 0,
+ 0, 0, 0,
+ (struct pipe_resource *)texture->flushed_depth_texture, 0,
+ &sbox);
+}
diff --git a/src/gallium/drivers/r600/r600_buffer.c b/src/gallium/drivers/r600/r600_buffer.c
index 03a61a3213..6ced719c8f 100644
--- a/src/gallium/drivers/r600/r600_buffer.c
+++ b/src/gallium/drivers/r600/r600_buffer.c
@@ -29,83 +29,50 @@
#include <util/u_math.h>
#include <util/u_inlines.h>
#include <util/u_memory.h>
-#include <util/u_upload_mgr.h>
+#include "util/u_upload_mgr.h"
+
#include "state_tracker/drm_driver.h"
+
#include <xf86drm.h>
#include "radeon_drm.h"
+
#include "r600.h"
#include "r600_pipe.h"
-extern struct u_resource_vtbl r600_buffer_vtbl;
-
-
-struct pipe_resource *r600_buffer_create(struct pipe_screen *screen,
- const struct pipe_resource *templ)
-{
- struct r600_resource_buffer *rbuffer;
- struct r600_bo *bo;
- /* XXX We probably want a different alignment for buffers and textures. */
- unsigned alignment = 4096;
-
- rbuffer = CALLOC_STRUCT(r600_resource_buffer);
- if (rbuffer == NULL)
- return NULL;
-
- rbuffer->magic = R600_BUFFER_MAGIC;
- rbuffer->user_buffer = NULL;
- rbuffer->num_ranges = 0;
- rbuffer->r.base.b = *templ;
- pipe_reference_init(&rbuffer->r.base.b.reference, 1);
- rbuffer->r.base.b.screen = screen;
- rbuffer->r.base.vtbl = &r600_buffer_vtbl;
- rbuffer->r.size = rbuffer->r.base.b.width0;
- bo = r600_bo((struct radeon*)screen->winsys, rbuffer->r.base.b.width0, alignment, rbuffer->r.base.b.bind, rbuffer->r.base.b.usage);
- if (bo == NULL) {
- FREE(rbuffer);
- return NULL;
- }
- rbuffer->r.bo = bo;
- return &rbuffer->r.base.b;
-}
-
-struct pipe_resource *r600_user_buffer_create(struct pipe_screen *screen,
- void *ptr, unsigned bytes,
- unsigned bind)
-{
- struct r600_resource_buffer *rbuffer;
-
- rbuffer = CALLOC_STRUCT(r600_resource_buffer);
- if (rbuffer == NULL)
- return NULL;
-
- rbuffer->magic = R600_BUFFER_MAGIC;
- pipe_reference_init(&rbuffer->r.base.b.reference, 1);
- rbuffer->r.base.vtbl = &r600_buffer_vtbl;
- rbuffer->r.base.b.screen = screen;
- rbuffer->r.base.b.target = PIPE_BUFFER;
- rbuffer->r.base.b.format = PIPE_FORMAT_R8_UNORM;
- rbuffer->r.base.b.usage = PIPE_USAGE_IMMUTABLE;
- rbuffer->r.base.b.bind = bind;
- rbuffer->r.base.b.width0 = bytes;
- rbuffer->r.base.b.height0 = 1;
- rbuffer->r.base.b.depth0 = 1;
- rbuffer->r.base.b.array_size = 1;
- rbuffer->r.base.b.flags = 0;
- rbuffer->num_ranges = 0;
- rbuffer->r.bo = NULL;
- rbuffer->user_buffer = ptr;
- return &rbuffer->r.base.b;
-}
-
static void r600_buffer_destroy(struct pipe_screen *screen,
struct pipe_resource *buf)
{
+ struct r600_screen *rscreen = (struct r600_screen*)screen;
struct r600_resource_buffer *rbuffer = r600_buffer(buf);
if (rbuffer->r.bo) {
r600_bo_reference((struct radeon*)screen->winsys, &rbuffer->r.bo, NULL);
}
- FREE(rbuffer);
+ rbuffer->r.bo = NULL;
+ util_slab_free(&rscreen->pool_buffers, rbuffer);
+}
+
+static struct pipe_transfer *r600_get_transfer(struct pipe_context *ctx,
+ struct pipe_resource *resource,
+ unsigned level,
+ unsigned usage,
+ const struct pipe_box *box)
+{
+ struct r600_pipe_context *rctx = (struct r600_pipe_context*)ctx;
+ struct pipe_transfer *transfer = util_slab_alloc(&rctx->pool_transfers);
+
+ transfer->resource = resource;
+ transfer->level = level;
+ transfer->usage = usage;
+ transfer->box = *box;
+ transfer->stride = 0;
+ transfer->layer_stride = 0;
+ transfer->data = NULL;
+
+ /* Note strides are zero, this is ok for buffers, but not for
+ * textures 2d & higher at least.
+ */
+ return transfer;
}
static void *r600_buffer_transfer_map(struct pipe_context *pipe,
@@ -114,29 +81,10 @@ static void *r600_buffer_transfer_map(struct pipe_context *pipe,
struct r600_resource_buffer *rbuffer = r600_buffer(transfer->resource);
int write = 0;
uint8_t *data;
- int i;
- boolean flush = FALSE;
-
- if (rbuffer->user_buffer)
- return (uint8_t*)rbuffer->user_buffer + transfer->box.x;
-
- if (transfer->usage & PIPE_TRANSFER_DISCARD) {
- for (i = 0; i < rbuffer->num_ranges; i++) {
- if ((transfer->box.x >= rbuffer->ranges[i].start) &&
- (transfer->box.x < rbuffer->ranges[i].end))
- flush = TRUE;
-
- if (flush) {
- r600_bo_reference((struct radeon*)pipe->winsys, &rbuffer->r.bo, NULL);
- rbuffer->num_ranges = 0;
- rbuffer->r.bo = r600_bo((struct radeon*)pipe->winsys,
- rbuffer->r.base.b.width0, 0,
- rbuffer->r.base.b.bind,
- rbuffer->r.base.b.usage);
- break;
- }
- }
- }
+
+ if (rbuffer->r.b.user_ptr)
+ return (uint8_t*)rbuffer->r.b.user_ptr + transfer->box.x;
+
if (transfer->usage & PIPE_TRANSFER_DONTBLOCK) {
/* FIXME */
}
@@ -155,44 +103,122 @@ static void r600_buffer_transfer_unmap(struct pipe_context *pipe,
{
struct r600_resource_buffer *rbuffer = r600_buffer(transfer->resource);
+ if (rbuffer->r.b.user_ptr)
+ return;
+
if (rbuffer->r.bo)
r600_bo_unmap((struct radeon*)pipe->winsys, rbuffer->r.bo);
}
static void r600_buffer_transfer_flush_region(struct pipe_context *pipe,
- struct pipe_transfer *transfer,
- const struct pipe_box *box)
+ struct pipe_transfer *transfer,
+ const struct pipe_box *box)
{
- struct r600_resource_buffer *rbuffer = r600_buffer(transfer->resource);
- unsigned i;
- unsigned offset = transfer->box.x + box->x;
- unsigned length = box->width;
+}
- assert(box->x + box->width <= transfer->box.width);
+static void r600_transfer_destroy(struct pipe_context *ctx,
+ struct pipe_transfer *transfer)
+{
+ struct r600_pipe_context *rctx = (struct r600_pipe_context*)ctx;
+ util_slab_free(&rctx->pool_transfers, transfer);
+}
- if (rbuffer->user_buffer)
- return;
+static void r600_buffer_transfer_inline_write(struct pipe_context *pipe,
+ struct pipe_resource *resource,
+ unsigned level,
+ unsigned usage,
+ const struct pipe_box *box,
+ const void *data,
+ unsigned stride,
+ unsigned layer_stride)
+{
+ struct radeon *ws = (struct radeon*)pipe->winsys;
+ struct r600_resource_buffer *rbuffer = r600_buffer(resource);
+ uint8_t *map = NULL;
- /* mark the range as used */
- for(i = 0; i < rbuffer->num_ranges; ++i) {
- if(offset <= rbuffer->ranges[i].end && rbuffer->ranges[i].start <= (offset+box->width)) {
- rbuffer->ranges[i].start = MIN2(rbuffer->ranges[i].start, offset);
- rbuffer->ranges[i].end = MAX2(rbuffer->ranges[i].end, (offset+length));
- return;
- }
- }
+ assert(rbuffer->r.b.user_ptr == NULL);
+
+ map = r600_bo_map(ws, rbuffer->r.bo,
+ PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD | usage,
+ pipe);
+
+ memcpy(map + box->x, data, box->width);
- rbuffer->ranges[rbuffer->num_ranges].start = offset;
- rbuffer->ranges[rbuffer->num_ranges].end = offset+length;
- rbuffer->num_ranges++;
+ if (rbuffer->r.bo)
+ r600_bo_unmap(ws, rbuffer->r.bo);
}
-unsigned r600_buffer_is_referenced_by_cs(struct pipe_context *context,
- struct pipe_resource *buf,
- unsigned level, int layer)
+static const struct u_resource_vtbl r600_buffer_vtbl =
{
- /* FIXME */
- return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE;
+ u_default_resource_get_handle, /* get_handle */
+ r600_buffer_destroy, /* resource_destroy */
+ r600_get_transfer, /* get_transfer */
+ r600_transfer_destroy, /* transfer_destroy */
+ r600_buffer_transfer_map, /* transfer_map */
+ r600_buffer_transfer_flush_region, /* transfer_flush_region */
+ r600_buffer_transfer_unmap, /* transfer_unmap */
+ r600_buffer_transfer_inline_write /* transfer_inline_write */
+};
+
+struct pipe_resource *r600_buffer_create(struct pipe_screen *screen,
+ const struct pipe_resource *templ)
+{
+ struct r600_screen *rscreen = (struct r600_screen*)screen;
+ struct r600_resource_buffer *rbuffer;
+ struct r600_bo *bo;
+ /* XXX We probably want a different alignment for buffers and textures. */
+ unsigned alignment = 4096;
+
+ rbuffer = util_slab_alloc(&rscreen->pool_buffers);
+
+ rbuffer->magic = R600_BUFFER_MAGIC;
+ rbuffer->r.b.b.b = *templ;
+ pipe_reference_init(&rbuffer->r.b.b.b.reference, 1);
+ rbuffer->r.b.b.b.screen = screen;
+ rbuffer->r.b.b.vtbl = &r600_buffer_vtbl;
+ rbuffer->r.b.user_ptr = NULL;
+ rbuffer->r.size = rbuffer->r.b.b.b.width0;
+ rbuffer->r.bo_size = rbuffer->r.size;
+
+ bo = r600_bo((struct radeon*)screen->winsys,
+ rbuffer->r.b.b.b.width0,
+ alignment, rbuffer->r.b.b.b.bind,
+ rbuffer->r.b.b.b.usage);
+
+ if (bo == NULL) {
+ FREE(rbuffer);
+ return NULL;
+ }
+ rbuffer->r.bo = bo;
+ return &rbuffer->r.b.b.b;
+}
+
+struct pipe_resource *r600_user_buffer_create(struct pipe_screen *screen,
+ void *ptr, unsigned bytes,
+ unsigned bind)
+{
+ struct r600_screen *rscreen = (struct r600_screen*)screen;
+ struct r600_resource_buffer *rbuffer;
+
+ rbuffer = util_slab_alloc(&rscreen->pool_buffers);
+
+ rbuffer->magic = R600_BUFFER_MAGIC;
+ pipe_reference_init(&rbuffer->r.b.b.b.reference, 1);
+ rbuffer->r.b.b.vtbl = &r600_buffer_vtbl;
+ rbuffer->r.b.b.b.screen = screen;
+ rbuffer->r.b.b.b.target = PIPE_BUFFER;
+ rbuffer->r.b.b.b.format = PIPE_FORMAT_R8_UNORM;
+ rbuffer->r.b.b.b.usage = PIPE_USAGE_IMMUTABLE;
+ rbuffer->r.b.b.b.bind = bind;
+ rbuffer->r.b.b.b.width0 = bytes;
+ rbuffer->r.b.b.b.height0 = 1;
+ rbuffer->r.b.b.b.depth0 = 1;
+ rbuffer->r.b.b.b.array_size = 1;
+ rbuffer->r.b.b.b.flags = 0;
+ rbuffer->r.b.user_ptr = ptr;
+ rbuffer->r.bo = NULL;
+ rbuffer->r.bo_size = 0;
+ return &rbuffer->r.b.b.b;
}
struct pipe_resource *r600_buffer_from_handle(struct pipe_screen *screen,
@@ -213,82 +239,39 @@ struct pipe_resource *r600_buffer_from_handle(struct pipe_screen *screen,
return NULL;
}
- pipe_reference_init(&rbuffer->base.b.reference, 1);
- rbuffer->base.b.target = PIPE_BUFFER;
- rbuffer->base.b.screen = screen;
- rbuffer->base.vtbl = &r600_buffer_vtbl;
+ pipe_reference_init(&rbuffer->b.b.b.reference, 1);
+ rbuffer->b.b.b.target = PIPE_BUFFER;
+ rbuffer->b.b.b.screen = screen;
+ rbuffer->b.b.vtbl = &r600_buffer_vtbl;
rbuffer->bo = bo;
- return &rbuffer->base.b;
+ return &rbuffer->b.b.b;
}
-struct u_resource_vtbl r600_buffer_vtbl =
+void r600_upload_index_buffer(struct r600_pipe_context *rctx, struct r600_drawl *draw)
{
- u_default_resource_get_handle, /* get_handle */
- r600_buffer_destroy, /* resource_destroy */
- r600_buffer_is_referenced_by_cs, /* is_buffer_referenced */
- u_default_get_transfer, /* get_transfer */
- u_default_transfer_destroy, /* transfer_destroy */
- r600_buffer_transfer_map, /* transfer_map */
- r600_buffer_transfer_flush_region, /* transfer_flush_region */
- r600_buffer_transfer_unmap, /* transfer_unmap */
- u_default_transfer_inline_write /* transfer_inline_write */
-};
+ struct r600_resource_buffer *rbuffer = r600_buffer(draw->index_buffer);
+ boolean flushed;
+
+ u_upload_data(rctx->vbuf_mgr->uploader, 0,
+ draw->info.count * draw->index_size,
+ rbuffer->r.b.user_ptr,
+ &draw->index_buffer_offset,
+ &draw->index_buffer, &flushed);
+}
-int r600_upload_index_buffer(struct r600_pipe_context *rctx, struct r600_drawl *draw)
+void r600_upload_const_buffer(struct r600_pipe_context *rctx, struct r600_resource_buffer **rbuffer,
+ uint32_t *const_offset)
{
- struct pipe_resource *upload_buffer = NULL;
- unsigned index_offset = draw->index_buffer_offset;
- int ret = 0;
-
- if (r600_buffer_is_user_buffer(draw->index_buffer)) {
- ret = u_upload_buffer(rctx->upload_ib,
- index_offset,
- draw->count * draw->index_size,
- draw->index_buffer,
- &index_offset,
- &upload_buffer);
- if (ret) {
- goto done;
- }
- draw->index_buffer_offset = index_offset;
-
- /* Transfer ownership. */
- pipe_resource_reference(&draw->index_buffer, upload_buffer);
- pipe_resource_reference(&upload_buffer, NULL);
- }
+ if ((*rbuffer)->r.b.user_ptr) {
+ uint8_t *ptr = (*rbuffer)->r.b.user_ptr;
+ unsigned size = (*rbuffer)->r.b.b.b.width0;
+ boolean flushed;
-done:
- return ret;
-}
+ *rbuffer = NULL;
-int r600_upload_user_buffers(struct r600_pipe_context *rctx)
-{
- enum pipe_error ret = PIPE_OK;
- int i, nr;
-
- nr = rctx->vertex_elements->count;
- nr = rctx->nvertex_buffer;
-
- for (i = 0; i < nr; i++) {
-// struct pipe_vertex_buffer *vb = &rctx->vertex_buffer[rctx->vertex_elements->elements[i].vertex_buffer_index];
- struct pipe_vertex_buffer *vb = &rctx->vertex_buffer[i];
-
- if (r600_buffer_is_user_buffer(vb->buffer)) {
- struct pipe_resource *upload_buffer = NULL;
- unsigned offset = 0; /*vb->buffer_offset * 4;*/
- unsigned size = vb->buffer->width0;
- unsigned upload_offset;
- ret = u_upload_buffer(rctx->upload_vb,
- offset, size,
- vb->buffer,
- &upload_offset, &upload_buffer);
- if (ret)
- return ret;
-
- pipe_resource_reference(&vb->buffer, NULL);
- vb->buffer = upload_buffer;
- vb->buffer_offset = upload_offset;
- }
+ u_upload_data(rctx->vbuf_mgr->uploader, 0, size, ptr, const_offset,
+ (struct pipe_resource**)rbuffer, &flushed);
+ } else {
+ *const_offset = 0;
}
- return ret;
}
diff --git a/src/gallium/drivers/r600/r600_opcodes.h b/src/gallium/drivers/r600/r600_opcodes.h
index 2ee0c83e5d..a85d0bbf1e 100644
--- a/src/gallium/drivers/r600/r600_opcodes.h
+++ b/src/gallium/drivers/r600/r600_opcodes.h
@@ -330,10 +330,14 @@
#define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_CLAMPED_64 0x00000098
#define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SQRT_64 0x00000099
/* TODO Fill in more ALU */
+#define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT 0x0000009B
+#define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_UINT_TO_FLT 0x0000009C
+/* TODO Fill in more ALU */
#define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT_FLOOR 0x000000B1
#define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4 0x000000BE
#define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4_IEEE 0x000000BF
#define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE 0x000000C0
+#define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX4 0x000000C1
#define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT 0x000000CC
#define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INTERP_XY 0x000000D6
diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c
index 6842571044..0e28bda6eb 100644
--- a/src/gallium/drivers/r600/r600_pipe.c
+++ b/src/gallium/drivers/r600/r600_pipe.c
@@ -30,12 +30,13 @@
#include <tgsi/tgsi_util.h>
#include <util/u_blitter.h>
#include <util/u_double_list.h>
+#include <util/u_format_s3tc.h>
#include <util/u_transfer.h>
#include <util/u_surface.h>
#include <util/u_pack_color.h>
#include <util/u_memory.h>
#include <util/u_inlines.h>
-#include <util/u_upload_mgr.h>
+#include "util/u_upload_mgr.h"
#include <pipebuffer/pb_buffer.h>
#include "r600.h"
#include "r600d.h"
@@ -47,7 +48,7 @@
/*
* pipe_context
*/
-static void r600_flush(struct pipe_context *ctx, unsigned flags,
+static void r600_flush(struct pipe_context *ctx,
struct pipe_fence_handle **fence)
{
struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
@@ -59,9 +60,6 @@ static void r600_flush(struct pipe_context *ctx, unsigned flags,
if (!rctx->ctx.pm4_cdwords)
return;
- u_upload_flush(rctx->upload_vb);
- u_upload_flush(rctx->upload_ib);
-
#if 0
sprintf(dname, "gallium-%08d.bof", dc);
if (dc < 20) {
@@ -71,6 +69,30 @@ static void r600_flush(struct pipe_context *ctx, unsigned flags,
dc++;
#endif
r600_context_flush(&rctx->ctx);
+
+ /* XXX This shouldn't be really necessary, but removing it breaks some tests.
+ * Needless buffer reallocations may significantly increase memory consumption,
+ * so getting rid of this call is important. */
+ u_upload_flush(rctx->vbuf_mgr->uploader);
+}
+
+static void r600_update_num_contexts(struct r600_screen *rscreen, int diff)
+{
+ pipe_mutex_lock(rscreen->mutex_num_contexts);
+ if (diff > 0) {
+ rscreen->num_contexts++;
+
+ if (rscreen->num_contexts > 1)
+ util_slab_set_thread_safety(&rscreen->pool_buffers,
+ UTIL_SLAB_MULTITHREADED);
+ } else {
+ rscreen->num_contexts--;
+
+ if (rscreen->num_contexts <= 1)
+ util_slab_set_thread_safety(&rscreen->pool_buffers,
+ UTIL_SLAB_SINGLETHREADED);
+ }
+ pipe_mutex_unlock(rscreen->mutex_num_contexts);
}
static void r600_destroy_context(struct pipe_context *context)
@@ -79,8 +101,6 @@ static void r600_destroy_context(struct pipe_context *context)
rctx->context.delete_depth_stencil_alpha_state(&rctx->context, rctx->custom_dsa_flush);
- r600_end_vertex_translate(rctx);
-
r600_context_fini(&rctx->ctx);
util_blitter_destroy(rctx->blitter);
@@ -89,14 +109,11 @@ static void r600_destroy_context(struct pipe_context *context)
free(rctx->states[i]);
}
- u_upload_destroy(rctx->upload_vb);
- u_upload_destroy(rctx->upload_ib);
+ u_vbuf_mgr_destroy(rctx->vbuf_mgr);
+ util_slab_destroy(&rctx->pool_transfers);
- if (rctx->tran.translate_cache)
- translate_cache_destroy(rctx->tran.translate_cache);
+ r600_update_num_contexts(rctx->screen, -1);
- FREE(rctx->ps_resource);
- FREE(rctx->vs_resource);
FREE(rctx);
}
@@ -108,6 +125,9 @@ static struct pipe_context *r600_create_context(struct pipe_screen *screen, void
if (rctx == NULL)
return NULL;
+
+ r600_update_num_contexts(rscreen, 1);
+
rctx->context.winsys = rscreen->screen.winsys;
rctx->context.screen = screen;
rctx->context.priv = priv;
@@ -123,6 +143,7 @@ static struct pipe_context *r600_create_context(struct pipe_screen *screen, void
r600_init_query_functions(rctx);
r600_init_context_resource_functions(rctx);
r600_init_surface_functions(rctx);
+ rctx->context.draw_vbo = r600_draw_vbo;
switch (r600_get_family(rctx->radeon)) {
case CHIP_R600:
@@ -137,7 +158,6 @@ static struct pipe_context *r600_create_context(struct pipe_screen *screen, void
case CHIP_RV730:
case CHIP_RV710:
case CHIP_RV740:
- rctx->context.draw_vbo = r600_draw_vbo;
r600_init_state_functions(rctx);
if (r600_context_init(&rctx->ctx, rctx->radeon)) {
r600_destroy_context(&rctx->context);
@@ -151,7 +171,9 @@ static struct pipe_context *r600_create_context(struct pipe_screen *screen, void
case CHIP_CYPRESS:
case CHIP_HEMLOCK:
case CHIP_PALM:
- rctx->context.draw_vbo = evergreen_draw;
+ case CHIP_BARTS:
+ case CHIP_TURKS:
+ case CHIP_CAICOS:
evergreen_init_state_functions(rctx);
if (evergreen_context_init(&rctx->ctx, rctx->radeon)) {
r600_destroy_context(&rctx->context);
@@ -165,41 +187,23 @@ static struct pipe_context *r600_create_context(struct pipe_screen *screen, void
return NULL;
}
- rctx->upload_ib = u_upload_create(&rctx->context, 32 * 1024, 16,
- PIPE_BIND_INDEX_BUFFER);
- if (rctx->upload_ib == NULL) {
- r600_destroy_context(&rctx->context);
- return NULL;
- }
+ util_slab_create(&rctx->pool_transfers,
+ sizeof(struct pipe_transfer), 64,
+ UTIL_SLAB_SINGLETHREADED);
- rctx->upload_vb = u_upload_create(&rctx->context, 128 * 1024, 16,
- PIPE_BIND_VERTEX_BUFFER);
- if (rctx->upload_vb == NULL) {
+ rctx->vbuf_mgr = u_vbuf_mgr_create(&rctx->context, 1024 * 1024, 256,
+ PIPE_BIND_VERTEX_BUFFER |
+ PIPE_BIND_INDEX_BUFFER |
+ PIPE_BIND_CONSTANT_BUFFER,
+ U_VERTEX_FETCH_DWORD_ALIGNED);
+ if (!rctx->vbuf_mgr) {
r600_destroy_context(&rctx->context);
return NULL;
}
rctx->blitter = util_blitter_create(&rctx->context);
if (rctx->blitter == NULL) {
- FREE(rctx);
- return NULL;
- }
-
- rctx->tran.translate_cache = translate_cache_create();
- if (rctx->tran.translate_cache == NULL) {
- FREE(rctx);
- return NULL;
- }
-
- rctx->vs_resource = CALLOC(R600_RESOURCE_ARRAY_SIZE, sizeof(struct r600_pipe_state));
- if (!rctx->vs_resource) {
- FREE(rctx);
- return NULL;
- }
-
- rctx->ps_resource = CALLOC(R600_RESOURCE_ARRAY_SIZE, sizeof(struct r600_pipe_state));
- if (!rctx->ps_resource) {
- FREE(rctx);
+ r600_destroy_context(&rctx->context);
return NULL;
}
@@ -209,8 +213,6 @@ static struct pipe_context *r600_create_context(struct pipe_screen *screen, void
else
rctx->custom_dsa_flush = evergreen_create_db_flush_dsa(rctx);
- r600_blit_uncompress_depth_ptr = r600_blit_uncompress_depth;
-
return &rctx->context;
}
@@ -243,6 +245,9 @@ static const char *r600_get_family_name(enum radeon_family family)
case CHIP_CYPRESS: return "AMD CYPRESS";
case CHIP_HEMLOCK: return "AMD HEMLOCK";
case CHIP_PALM: return "AMD PALM";
+ case CHIP_BARTS: return "AMD BARTS";
+ case CHIP_TURKS: return "AMD TURKS";
+ case CHIP_CAICOS: return "AMD CAICOS";
default: return "AMD unknown";
}
}
@@ -275,18 +280,32 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
case PIPE_CAP_BLEND_EQUATION_SEPARATE:
case PIPE_CAP_SM3:
case PIPE_CAP_TEXTURE_SWIZZLE:
- case PIPE_CAP_INDEP_BLEND_ENABLE:
case PIPE_CAP_DEPTHSTENCIL_CLEAR_SEPARATE:
case PIPE_CAP_DEPTH_CLAMP:
case PIPE_CAP_SHADER_STENCIL_EXPORT:
+ case PIPE_CAP_TGSI_INSTANCEID:
+ case PIPE_CAP_VERTEX_ELEMENT_INSTANCE_DIVISOR:
return 1;
+ case PIPE_CAP_INDEP_BLEND_ENABLE:
+ /* R600 doesn't support per-MRT blends */
+ if (family == CHIP_R600)
+ return 0;
+ else
+ return 1;
/* Unsupported features (boolean caps). */
- case PIPE_CAP_TIMER_QUERY:
case PIPE_CAP_STREAM_OUTPUT:
case PIPE_CAP_PRIMITIVE_RESTART:
case PIPE_CAP_INDEP_BLEND_FUNC: /* FIXME allow this */
- return 0;
+ /* R600 doesn't support per-MRT blends */
+ if (family == CHIP_R600)
+ return 0;
+ else
+ return 0;
+
+ case PIPE_CAP_ARRAY_TEXTURES:
+ /* fix once the CS checker upstream is fixed */
+ return debug_get_bool_option("R600_ARRAY_TEXTURE", FALSE);
/* Texturing. */
case PIPE_CAP_MAX_TEXTURE_2D_LEVELS:
@@ -316,6 +335,10 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER:
return 0;
+ /* Timer queries, present when the clock frequency is non zero. */
+ case PIPE_CAP_TIMER_QUERY:
+ return r600_get_clock_crystal_freq(rscreen->radeon) != 0;
+
default:
R600_ERR("r600: unknown param %d\n", param);
return 0;
@@ -380,9 +403,9 @@ static int r600_get_shader_param(struct pipe_screen* pscreen, unsigned shader, e
case PIPE_SHADER_CAP_MAX_ADDRS:
return 1; //max native address registers/* FIXME Isn't this equal to TEMPS? */
case PIPE_SHADER_CAP_MAX_CONSTS:
- return 256; //max native parameters
+ return R600_MAX_CONST_BUFFER_SIZE;
case PIPE_SHADER_CAP_MAX_CONST_BUFFERS:
- return 1;
+ return R600_MAX_CONST_BUFFERS;
case PIPE_SHADER_CAP_MAX_PREDS:
return 0; /* FIXME */
case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED:
@@ -403,8 +426,7 @@ static boolean r600_is_format_supported(struct pipe_screen* screen,
enum pipe_format format,
enum pipe_texture_target target,
unsigned sample_count,
- unsigned usage,
- unsigned geom_flags)
+ unsigned usage)
{
unsigned retval = 0;
if (target >= PIPE_MAX_TEXTURE_TYPES) {
@@ -417,7 +439,7 @@ static boolean r600_is_format_supported(struct pipe_screen* screen,
return FALSE;
if ((usage & PIPE_BIND_SAMPLER_VIEW) &&
- r600_is_sampler_format_supported(format)) {
+ r600_is_sampler_format_supported(screen, format)) {
retval |= PIPE_BIND_SAMPLER_VIEW;
}
@@ -438,9 +460,14 @@ static boolean r600_is_format_supported(struct pipe_screen* screen,
retval |= PIPE_BIND_DEPTH_STENCIL;
}
- if ((usage & PIPE_BIND_VERTEX_BUFFER) &&
- r600_is_vertex_format_supported(format))
- retval |= PIPE_BIND_VERTEX_BUFFER;
+ if (usage & PIPE_BIND_VERTEX_BUFFER) {
+ struct r600_screen *rscreen = (struct r600_screen *)screen;
+ enum radeon_family family = r600_get_family(rscreen->radeon);
+
+ if (r600_is_vertex_format_supported(format, family)) {
+ retval |= PIPE_BIND_VERTEX_BUFFER;
+ }
+ }
if (usage & PIPE_BIND_TRANSFER_READ)
retval |= PIPE_BIND_TRANSFER_READ;
@@ -459,6 +486,8 @@ static void r600_destroy_screen(struct pipe_screen* pscreen)
radeon_decref(rscreen->radeon);
+ util_slab_destroy(&rscreen->pool_buffers);
+ pipe_mutex_destroy(rscreen->mutex_num_contexts);
FREE(rscreen);
}
@@ -485,6 +514,13 @@ struct pipe_screen *r600_screen_create(struct radeon *radeon)
r600_init_screen_resource_functions(&rscreen->screen);
rscreen->tiling_info = r600_get_tiling_info(radeon);
+ util_format_s3tc_init();
+
+ util_slab_create(&rscreen->pool_buffers,
+ sizeof(struct r600_resource_buffer), 64,
+ UTIL_SLAB_SINGLETHREADED);
+
+ pipe_mutex_init(rscreen->mutex_num_contexts);
return &rscreen->screen;
}
diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h
index 485f42166d..396801e4a4 100644
--- a/src/gallium/drivers/r600/r600_pipe.h
+++ b/src/gallium/drivers/r600/r600_pipe.h
@@ -30,12 +30,16 @@
#include <pipe/p_screen.h>
#include <pipe/p_context.h>
#include <util/u_math.h>
-#include "translate/translate_cache.h"
+#include "util/u_slab.h"
+#include "util/u_vbuf_mgr.h"
#include "r600.h"
#include "r600_public.h"
#include "r600_shader.h"
#include "r600_resource.h"
+#define R600_MAX_CONST_BUFFERS 1
+#define R600_MAX_CONST_BUFFER_SIZE 4096
+
enum r600_pipe_state_id {
R600_PIPE_STATE_BLEND = 0,
R600_PIPE_STATE_BLEND_COLOR,
@@ -62,6 +66,11 @@ struct r600_screen {
struct pipe_screen screen;
struct radeon *radeon;
struct r600_tiling_info *tiling_info;
+ struct util_slab_mempool pool_buffers;
+ unsigned num_contexts;
+
+ /* for thread-safe write accessing to num_contexts */
+ pipe_mutex mutex_num_contexts;
};
struct r600_pipe_sampler_view {
@@ -86,9 +95,7 @@ struct r600_vertex_element
{
unsigned count;
struct pipe_vertex_element elements[PIPE_MAX_ATTRIBS];
- enum pipe_format hw_format[PIPE_MAX_ATTRIBS];
- unsigned hw_format_size[PIPE_MAX_ATTRIBS];
- boolean incompatible_layout;
+ struct u_vbuf_mgr_elements *vmgr_elements;
struct r600_bo *fetch_shader;
unsigned fs_size;
struct r600_pipe_state rstate;
@@ -111,30 +118,18 @@ struct r600_pipe_shader {
#define NUM_TEX_UNITS 16
struct r600_textures_info {
- struct r600_pipe_sampler_view *views[NUM_TEX_UNITS];
+ struct r600_pipe_sampler_view *views[NUM_TEX_UNITS];
unsigned n_views;
void *samplers[NUM_TEX_UNITS];
unsigned n_samplers;
};
-/* vertex buffer translation context, used to translate vertex input that
- * hw doesn't natively support, so far only FLOAT64 is unsupported.
- */
-struct r600_translate_context {
- /* Translate cache for incompatible vertex offset/stride/format fallback. */
- struct translate_cache *translate_cache;
- /* The vertex buffer slot containing the translated buffer. */
- unsigned vb_slot;
- void *new_velems;
-};
-
#define R600_CONSTANT_ARRAY_SIZE 256
#define R600_RESOURCE_ARRAY_SIZE 160
struct r600_pipe_context {
struct pipe_context context;
struct blitter_context *blitter;
- struct pipe_framebuffer_state *pframebuffer;
unsigned family;
void *custom_dsa_flush;
struct r600_screen *screen;
@@ -142,43 +137,35 @@ struct r600_pipe_context {
struct r600_pipe_state *states[R600_PIPE_NSTATES];
struct r600_context ctx;
struct r600_vertex_element *vertex_elements;
+ struct r600_pipe_state fs_resource[PIPE_MAX_ATTRIBS];
struct pipe_framebuffer_state framebuffer;
struct pipe_index_buffer index_buffer;
- struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS];
- unsigned nvertex_buffer;
unsigned cb_target_mask;
/* for saving when using blitter */
struct pipe_stencil_ref stencil_ref;
struct pipe_viewport_state viewport;
struct pipe_clip_state clip;
- unsigned nvs_resource;
- struct r600_pipe_state *vs_resource;
- struct r600_pipe_state *ps_resource;
struct r600_pipe_state config;
struct r600_pipe_shader *ps_shader;
struct r600_pipe_shader *vs_shader;
struct r600_pipe_state vs_const_buffer;
+ struct r600_pipe_state vs_const_buffer_resource[R600_MAX_CONST_BUFFERS];
struct r600_pipe_state ps_const_buffer;
+ struct r600_pipe_state ps_const_buffer_resource[R600_MAX_CONST_BUFFERS];
struct r600_pipe_rasterizer *rasterizer;
/* shader information */
unsigned sprite_coord_enable;
bool flatshade;
- struct u_upload_mgr *upload_vb;
- struct u_upload_mgr *upload_ib;
- unsigned any_user_vbs;
struct r600_textures_info ps_samplers;
- unsigned vb_max_index;
- struct r600_translate_context tran;
+
+ struct u_vbuf_mgr *vbuf_mgr;
+ struct util_slab_mempool pool_transfers;
+ bool blit;
};
struct r600_drawl {
+ struct pipe_draw_info info;
struct pipe_context *ctx;
- unsigned mode;
- unsigned min_index;
- unsigned max_index;
- unsigned index_bias;
- unsigned start;
- unsigned count;
unsigned index_size;
unsigned index_buffer_offset;
struct pipe_resource *index_buffer;
@@ -187,16 +174,21 @@ struct r600_drawl {
/* evergreen_state.c */
void evergreen_init_state_functions(struct r600_pipe_context *rctx);
void evergreen_init_config(struct r600_pipe_context *rctx);
-void evergreen_draw(struct pipe_context *ctx, const struct pipe_draw_info *info);
void evergreen_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader *shader);
void evergreen_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shader *shader);
+void evergreen_fetch_shader(struct r600_vertex_element *ve);
void *evergreen_create_db_flush_dsa(struct r600_pipe_context *rctx);
void evergreen_polygon_offset_update(struct r600_pipe_context *rctx);
-void evergreen_vertex_buffer_update(struct r600_pipe_context *rctx);
+void evergreen_pipe_set_buffer_resource(struct r600_pipe_context *rctx,
+ struct r600_pipe_state *rstate,
+ struct r600_resource *rbuffer,
+ unsigned offset, unsigned stride);
/* r600_blit.c */
void r600_init_blit_functions(struct r600_pipe_context *rctx);
-int r600_blit_uncompress_depth(struct pipe_context *ctx, struct r600_resource_texture *texture);
+void r600_blit_uncompress_depth(struct pipe_context *ctx, struct r600_resource_texture *texture);
+void r600_blit_push_depth(struct pipe_context *ctx, struct r600_resource_texture *texture);
+void r600_flush_depth_textures(struct r600_pipe_context *rctx);
/* r600_buffer.c */
struct pipe_resource *r600_buffer_create(struct pipe_screen *screen,
@@ -204,13 +196,9 @@ struct pipe_resource *r600_buffer_create(struct pipe_screen *screen,
struct pipe_resource *r600_user_buffer_create(struct pipe_screen *screen,
void *ptr, unsigned bytes,
unsigned bind);
-unsigned r600_buffer_is_referenced_by_cs(struct pipe_context *context,
- struct pipe_resource *buf,
- unsigned level, int layer);
struct pipe_resource *r600_buffer_from_handle(struct pipe_screen *screen,
struct winsys_handle *whandle);
-int r600_upload_index_buffer(struct r600_pipe_context *rctx, struct r600_drawl *draw);
-int r600_upload_user_buffers(struct r600_pipe_context *rctx);
+void r600_upload_index_buffer(struct r600_pipe_context *rctx, struct r600_drawl *draw);
/* r600_query.c */
void r600_init_query_functions(struct r600_pipe_context *rctx);
@@ -219,7 +207,6 @@ void r600_init_query_functions(struct r600_pipe_context *rctx);
void r600_init_context_resource_functions(struct r600_pipe_context *r600);
/* r600_shader.c */
-int r600_pipe_shader(struct pipe_context *ctx, struct r600_pipe_shader *shader);
int r600_pipe_shader_create(struct pipe_context *ctx, struct r600_pipe_shader *shader, const struct tgsi_token *tokens);
void r600_pipe_shader_destroy(struct pipe_context *ctx, struct r600_pipe_shader *shader);
int r600_find_vs_semantic_index(struct r600_shader *vs,
@@ -227,11 +214,17 @@ int r600_find_vs_semantic_index(struct r600_shader *vs,
/* r600_state.c */
void r600_init_state_functions(struct r600_pipe_context *rctx);
-void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info);
+void r600_spi_update(struct r600_pipe_context *rctx);
void r600_init_config(struct r600_pipe_context *rctx);
+void r600_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader *shader);
+void r600_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shader *shader);
+void r600_fetch_shader(struct r600_vertex_element *ve);
void *r600_create_db_flush_dsa(struct r600_pipe_context *rctx);
void r600_polygon_offset_update(struct r600_pipe_context *rctx);
-void r600_vertex_buffer_update(struct r600_pipe_context *rctx);
+void r600_pipe_set_buffer_resource(struct r600_pipe_context *rctx,
+ struct r600_pipe_state *rstate,
+ struct r600_resource *rbuffer,
+ unsigned offset, unsigned stride);
/* r600_helper.h */
int r600_conv_pipe_prim(unsigned pprim, unsigned *prim);
@@ -239,15 +232,13 @@ int r600_conv_pipe_prim(unsigned pprim, unsigned *prim);
/* r600_texture.c */
void r600_init_screen_texture_functions(struct pipe_screen *screen);
void r600_init_surface_functions(struct r600_pipe_context *r600);
-uint32_t r600_translate_texformat(enum pipe_format format,
- const unsigned char *swizzle_view,
+uint32_t r600_translate_texformat(struct pipe_screen *screen, enum pipe_format format,
+ const unsigned char *swizzle_view,
uint32_t *word4_p, uint32_t *yuv_format_p);
unsigned r600_texture_get_offset(struct r600_resource_texture *rtex,
unsigned level, unsigned layer);
/* r600_translate.c */
-void r600_begin_vertex_translate(struct r600_pipe_context *rctx);
-void r600_end_vertex_translate(struct r600_pipe_context *rctx);
void r600_translate_index_buffer(struct r600_pipe_context *r600,
struct pipe_resource **index_buffer,
unsigned *index_size,
@@ -270,13 +261,16 @@ void r600_sampler_view_destroy(struct pipe_context *ctx,
void r600_bind_state(struct pipe_context *ctx, void *state);
void r600_delete_state(struct pipe_context *ctx, void *state);
void r600_bind_vertex_elements(struct pipe_context *ctx, void *state);
-
void *r600_create_shader_state(struct pipe_context *ctx,
const struct pipe_shader_state *state);
void r600_bind_ps_shader(struct pipe_context *ctx, void *state);
void r600_bind_vs_shader(struct pipe_context *ctx, void *state);
void r600_delete_ps_shader(struct pipe_context *ctx, void *state);
void r600_delete_vs_shader(struct pipe_context *ctx, void *state);
+void r600_set_constant_buffer(struct pipe_context *ctx, uint shader, uint index,
+ struct pipe_resource *buffer);
+void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info);
+
/*
* common helpers
*/
diff --git a/src/gallium/drivers/r600/r600_query.c b/src/gallium/drivers/r600/r600_query.c
index 726668260c..181ea3f9e4 100644
--- a/src/gallium/drivers/r600/r600_query.c
+++ b/src/gallium/drivers/r600/r600_query.c
@@ -21,6 +21,7 @@
* USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "r600_pipe.h"
+#include "r600d.h"
static struct pipe_query *r600_create_query(struct pipe_context *ctx, unsigned query_type)
{
@@ -61,11 +62,35 @@ static boolean r600_get_query_result(struct pipe_context *ctx,
struct r600_query *rquery = (struct r600_query *)query;
if (rquery->num_results) {
- ctx->flush(ctx, 0, NULL);
+ ctx->flush(ctx, NULL);
}
return r600_context_query_result(&rctx->ctx, (struct r600_query *)query, wait, vresult);
}
+static void r600_render_condition(struct pipe_context *ctx,
+ struct pipe_query *query,
+ uint mode)
+{
+ struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
+ struct r600_query *rquery = (struct r600_query *)query;
+ int wait_flag = 0;
+
+ if (!query) {
+ rctx->ctx.predicate_drawing = false;
+ r600_query_predication(&rctx->ctx, NULL, PREDICATION_OP_CLEAR, 1);
+ return;
+ }
+
+ if (mode == PIPE_RENDER_COND_WAIT ||
+ mode == PIPE_RENDER_COND_BY_REGION_WAIT) {
+ wait_flag = 1;
+ }
+
+ rctx->ctx.predicate_drawing = true;
+ r600_query_predication(&rctx->ctx, rquery, PREDICATION_OP_ZPASS, wait_flag);
+
+}
+
void r600_init_query_functions(struct r600_pipe_context *rctx)
{
rctx->context.create_query = r600_create_query;
@@ -73,4 +98,7 @@ void r600_init_query_functions(struct r600_pipe_context *rctx)
rctx->context.begin_query = r600_begin_query;
rctx->context.end_query = r600_end_query;
rctx->context.get_query_result = r600_get_query_result;
+
+ if (r600_get_num_backends(rctx->screen->radeon) > 0)
+ rctx->context.render_condition = r600_render_condition;
}
diff --git a/src/gallium/drivers/r600/r600_resource.c b/src/gallium/drivers/r600/r600_resource.c
index 207642ccfa..f3ab3613c8 100644
--- a/src/gallium/drivers/r600/r600_resource.c
+++ b/src/gallium/drivers/r600/r600_resource.c
@@ -61,5 +61,4 @@ void r600_init_context_resource_functions(struct r600_pipe_context *r600)
r600->context.transfer_unmap = u_transfer_unmap_vtbl;
r600->context.transfer_destroy = u_transfer_destroy_vtbl;
r600->context.transfer_inline_write = u_transfer_inline_write_vtbl;
- r600->context.is_resource_referenced = u_is_resource_referenced_vtbl;
}
diff --git a/src/gallium/drivers/r600/r600_resource.h b/src/gallium/drivers/r600/r600_resource.h
index 25aa84682c..836e7491f1 100644
--- a/src/gallium/drivers/r600/r600_resource.h
+++ b/src/gallium/drivers/r600/r600_resource.h
@@ -24,6 +24,7 @@
#define R600_RESOURCE_H
#include "util/u_transfer.h"
+#include "util/u_vbuf_mgr.h"
/* flag to indicate a resource is to be used as a transfer so should not be tiled */
#define R600_RESOURCE_FLAG_TRANSFER PIPE_RESOURCE_FLAG_DRV_PRIV
@@ -43,25 +44,45 @@ struct r600_transfer {
* underlying implementations.
*/
struct r600_resource {
- struct u_resource base;
+ struct u_vbuf_resource b;
struct r600_bo *bo;
u32 size;
+ unsigned bo_size;
};
struct r600_resource_texture {
struct r600_resource resource;
unsigned offset[PIPE_MAX_TEXTURE_LEVELS];
- unsigned pitch_in_bytes[PIPE_MAX_TEXTURE_LEVELS];
- unsigned pitch_in_pixels[PIPE_MAX_TEXTURE_LEVELS];
+ unsigned pitch_in_bytes[PIPE_MAX_TEXTURE_LEVELS]; /* transfer */
+ unsigned pitch_in_blocks[PIPE_MAX_TEXTURE_LEVELS]; /* texture resource */
unsigned layer_size[PIPE_MAX_TEXTURE_LEVELS];
unsigned array_mode[PIPE_MAX_TEXTURE_LEVELS];
unsigned pitch_override;
unsigned size;
- unsigned tiled;
unsigned tile_type;
unsigned depth;
- unsigned dirty;
- struct r600_resource_texture *flushed_depth_texture;
+ unsigned dirty_db;
+ struct r600_resource_texture *flushed_depth_texture;
+ boolean is_flushing_texture;
+
+ /* on some cards we have to use integer 64/128-bit types
+ for s3tc blits, do this until gallium grows int formats */
+ boolean force_int_type;
+};
+
+#define R600_TEX_IS_TILED(tex, level) ((tex)->array_mode[level] != V_038000_ARRAY_LINEAR_GENERAL && (tex)->array_mode[level] != V_038000_ARRAY_LINEAR_ALIGNED)
+
+#define R600_BUFFER_MAGIC 0xabcd1600
+
+/* XXX this could be removed */
+struct r600_resource_buffer {
+ struct r600_resource r;
+ uint32_t magic;
+};
+
+struct r600_surface {
+ struct pipe_surface base;
+ unsigned aligned_height;
};
void r600_init_screen_resource_functions(struct pipe_screen *screen);
@@ -73,41 +94,17 @@ struct pipe_resource *r600_texture_from_handle(struct pipe_screen *screen,
const struct pipe_resource *base,
struct winsys_handle *whandle);
-#define R600_BUFFER_MAGIC 0xabcd1600
-#define R600_BUFFER_MAX_RANGES 32
-
-struct r600_buffer_range {
- uint32_t start;
- uint32_t end;
-};
-
-struct r600_resource_buffer {
- struct r600_resource r;
- uint32_t magic;
- void *user_buffer;
- struct r600_buffer_range ranges[R600_BUFFER_MAX_RANGES];
- unsigned num_ranges;
-};
-
/* r600_buffer */
static INLINE struct r600_resource_buffer *r600_buffer(struct pipe_resource *buffer)
{
if (buffer) {
assert(((struct r600_resource_buffer *)buffer)->magic == R600_BUFFER_MAGIC);
return (struct r600_resource_buffer *)buffer;
- }
- return NULL;
-}
-
-static INLINE boolean r600_buffer_is_user_buffer(struct pipe_resource *buffer)
-{
- return r600_buffer(buffer)->user_buffer ? TRUE : FALSE;
+ }
+ return NULL;
}
-int r600_texture_depth_flush(struct pipe_context *ctx,
- struct pipe_resource *texture);
-
-extern int (*r600_blit_uncompress_depth_ptr)(struct pipe_context *ctx, struct r600_resource_texture *texture);
+int r600_texture_depth_flush(struct pipe_context *ctx, struct pipe_resource *texture, boolean just_create);
/* r600_texture.c texture transfer functions. */
struct pipe_transfer* r600_texture_get_transfer(struct pipe_context *ctx,
@@ -122,9 +119,8 @@ void* r600_texture_transfer_map(struct pipe_context *ctx,
void r600_texture_transfer_unmap(struct pipe_context *ctx,
struct pipe_transfer* transfer);
-struct r600_surface {
- struct pipe_surface base;
- unsigned aligned_height;
-};
+struct r600_pipe_context;
+
+void r600_upload_const_buffer(struct r600_pipe_context *rctx, struct r600_resource_buffer **rbuffer, uint32_t *offset);
#endif
diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c
index d6455023a3..e7285d624e 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -28,60 +28,12 @@
#include "r600_pipe.h"
#include "r600_asm.h"
#include "r600_sq.h"
+#include "r600_formats.h"
#include "r600_opcodes.h"
#include "r600d.h"
#include <stdio.h>
#include <errno.h>
-static void r600_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shader *shader)
-{
- struct r600_pipe_state *rstate = &shader->rstate;
- struct r600_shader *rshader = &shader->shader;
- unsigned spi_vs_out_id[10];
- unsigned i, tmp;
-
- /* clear previous register */
- rstate->nregs = 0;
-
- /* so far never got proper semantic id from tgsi */
- /* FIXME better to move this in config things so they get emited
- * only one time per cs
- */
- for (i = 0; i < 10; i++) {
- spi_vs_out_id[i] = 0;
- }
- for (i = 0; i < 32; i++) {
- tmp = i << ((i & 3) * 8);
- spi_vs_out_id[i / 4] |= tmp;
- }
- for (i = 0; i < 10; i++) {
- r600_pipe_state_add_reg(rstate,
- R_028614_SPI_VS_OUT_ID_0 + i * 4,
- spi_vs_out_id[i], 0xFFFFFFFF, NULL);
- }
-
- r600_pipe_state_add_reg(rstate,
- R_0286C4_SPI_VS_OUT_CONFIG,
- S_0286C4_VS_EXPORT_COUNT(rshader->noutput - 2),
- 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate,
- R_028868_SQ_PGM_RESOURCES_VS,
- S_028868_NUM_GPRS(rshader->bc.ngpr) |
- S_028868_STACK_SIZE(rshader->bc.nstack),
- 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate,
- R_0288D0_SQ_PGM_CF_OFFSET_VS,
- 0x00000000, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate,
- R_028858_SQ_PGM_START_VS,
- r600_bo_offset(shader->bo) >> 8, 0xFFFFFFFF, shader->bo);
-
- r600_pipe_state_add_reg(rstate,
- R_03E200_SQ_LOOP_CONST_0 + (32 * 4), 0x01000FFF,
- 0xFFFFFFFF, NULL);
-
-}
-
int r600_find_vs_semantic_index(struct r600_shader *vs,
struct r600_shader *ps, int id)
{
@@ -96,98 +48,7 @@ int r600_find_vs_semantic_index(struct r600_shader *vs,
return 0;
}
-static void r600_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader *shader)
-{
- struct r600_pipe_state *rstate = &shader->rstate;
- struct r600_shader *rshader = &shader->shader;
- unsigned i, exports_ps, num_cout, spi_ps_in_control_0, spi_input_z, spi_ps_in_control_1;
- int pos_index = -1, face_index = -1;
-
- rstate->nregs = 0;
-
- for (i = 0; i < rshader->ninput; i++) {
- if (rshader->input[i].name == TGSI_SEMANTIC_POSITION)
- pos_index = i;
- if (rshader->input[i].name == TGSI_SEMANTIC_FACE)
- face_index = i;
- }
-
- for (i = 0; i < rshader->noutput; i++) {
- if (rshader->output[i].name == TGSI_SEMANTIC_POSITION)
- r600_pipe_state_add_reg(rstate,
- R_02880C_DB_SHADER_CONTROL,
- S_02880C_Z_EXPORT_ENABLE(1),
- S_02880C_Z_EXPORT_ENABLE(1), NULL);
- if (rshader->output[i].name == TGSI_SEMANTIC_STENCIL)
- r600_pipe_state_add_reg(rstate,
- R_02880C_DB_SHADER_CONTROL,
- S_02880C_STENCIL_REF_EXPORT_ENABLE(1),
- S_02880C_STENCIL_REF_EXPORT_ENABLE(1), NULL);
- }
-
- exports_ps = 0;
- num_cout = 0;
- for (i = 0; i < rshader->noutput; i++) {
- if (rshader->output[i].name == TGSI_SEMANTIC_POSITION || rshader->output[i].name == TGSI_SEMANTIC_STENCIL)
- exports_ps |= 1;
- else if (rshader->output[i].name == TGSI_SEMANTIC_COLOR) {
- num_cout++;
- }
- }
- exports_ps |= S_028854_EXPORT_COLORS(num_cout);
- if (!exports_ps) {
- /* always at least export 1 component per pixel */
- exports_ps = 2;
- }
-
- spi_ps_in_control_0 = S_0286CC_NUM_INTERP(rshader->ninput) |
- S_0286CC_PERSP_GRADIENT_ENA(1);
- spi_input_z = 0;
- if (pos_index != -1) {
- spi_ps_in_control_0 |= (S_0286CC_POSITION_ENA(1) |
- S_0286CC_POSITION_CENTROID(rshader->input[pos_index].centroid) |
- S_0286CC_POSITION_ADDR(rshader->input[pos_index].gpr) |
- S_0286CC_BARYC_SAMPLE_CNTL(1));
- spi_input_z |= 1;
- }
-
- spi_ps_in_control_1 = 0;
- if (face_index != -1) {
- spi_ps_in_control_1 |= S_0286D0_FRONT_FACE_ENA(1) |
- S_0286D0_FRONT_FACE_ADDR(rshader->input[face_index].gpr);
- }
-
- r600_pipe_state_add_reg(rstate, R_0286CC_SPI_PS_IN_CONTROL_0, spi_ps_in_control_0, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R_0286D0_SPI_PS_IN_CONTROL_1, spi_ps_in_control_1, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R_0286D8_SPI_INPUT_Z, spi_input_z, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate,
- R_028840_SQ_PGM_START_PS,
- r600_bo_offset(shader->bo) >> 8, 0xFFFFFFFF, shader->bo);
- r600_pipe_state_add_reg(rstate,
- R_028850_SQ_PGM_RESOURCES_PS,
- S_028868_NUM_GPRS(rshader->bc.ngpr) |
- S_028868_STACK_SIZE(rshader->bc.nstack),
- 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate,
- R_028854_SQ_PGM_EXPORTS_PS,
- exports_ps, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate,
- R_0288CC_SQ_PGM_CF_OFFSET_PS,
- 0x00000000, 0xFFFFFFFF, NULL);
-
- if (rshader->uses_kill) {
- /* only set some bits here, the other bits are set in the dsa state */
- r600_pipe_state_add_reg(rstate,
- R_02880C_DB_SHADER_CONTROL,
- S_02880C_KILL_ENABLE(1),
- S_02880C_KILL_ENABLE(1), NULL);
- }
- r600_pipe_state_add_reg(rstate,
- R_03E200_SQ_LOOP_CONST_0, 0x01000FFF,
- 0xFFFFFFFF, NULL);
-}
-
-int r600_pipe_shader(struct pipe_context *ctx, struct r600_pipe_shader *shader)
+static int r600_pipe_shader(struct pipe_context *ctx, struct r600_pipe_shader *shader)
{
struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
struct r600_shader *rshader = &shader->shader;
@@ -225,14 +86,23 @@ int r600_pipe_shader(struct pipe_context *ctx, struct r600_pipe_shader *shader)
return 0;
}
-int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader);
+static int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader);
+
int r600_pipe_shader_create(struct pipe_context *ctx, struct r600_pipe_shader *shader, const struct tgsi_token *tokens)
{
+ static int dump_shaders = -1;
struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
int r;
-//fprintf(stderr, "--------------------------------------------------------------\n");
-//tgsi_dump(tokens, 0);
+ /* Would like some magic "get_bool_option_once" routine.
+ */
+ if (dump_shaders == -1)
+ dump_shaders = debug_get_bool_option("R600_DUMP_SHADERS", FALSE);
+
+ if (dump_shaders) {
+ fprintf(stderr, "--------------------------------------------------------------\n");
+ tgsi_dump(tokens, 0);
+ }
shader->shader.family = r600_get_family(rctx->radeon);
r = r600_shader_from_tgsi(tokens, &shader->shader);
if (r) {
@@ -244,8 +114,10 @@ int r600_pipe_shader_create(struct pipe_context *ctx, struct r600_pipe_shader *s
R600_ERR("building bytecode failed !\n");
return r;
}
-//r600_bc_dump(&shader->shader.bc);
-//fprintf(stderr, "______________________________________________________________\n");
+ if (dump_shaders) {
+ r600_bc_dump(&shader->shader.bc);
+ fprintf(stderr, "______________________________________________________________\n");
+ }
return r600_pipe_shader(ctx, shader);
}
@@ -262,6 +134,15 @@ void r600_pipe_shader_destroy(struct pipe_context *ctx, struct r600_pipe_shader
*/
struct r600_shader_tgsi_instruction;
+struct r600_shader_src {
+ unsigned sel;
+ unsigned swizzle[4];
+ unsigned neg;
+ unsigned abs;
+ unsigned rel;
+ uint32_t value[4];
+};
+
struct r600_shader_ctx {
struct tgsi_shader_info info;
struct tgsi_parse_context parse;
@@ -269,10 +150,11 @@ struct r600_shader_ctx {
unsigned type;
unsigned file_offset[TGSI_FILE_COUNT];
unsigned temp_reg;
+ unsigned ar_reg;
struct r600_shader_tgsi_instruction *inst_info;
struct r600_bc *bc;
struct r600_shader *shader;
- u32 value[4];
+ struct r600_shader_src src[3];
u32 *literals;
u32 nliterals;
u32 max_driver_temp_used;
@@ -391,6 +273,7 @@ static int tgsi_declaration(struct r600_shader_ctx *ctx)
{
struct tgsi_full_declaration *d = &ctx->parse.FullToken.FullDeclaration;
unsigned i;
+ int r;
switch (d->Declaration.File) {
case TGSI_FILE_INPUT:
@@ -422,6 +305,26 @@ static int tgsi_declaration(struct r600_shader_ctx *ctx)
case TGSI_FILE_SAMPLER:
case TGSI_FILE_ADDRESS:
break;
+
+ case TGSI_FILE_SYSTEM_VALUE:
+ if (d->Semantic.Name == TGSI_SEMANTIC_INSTANCEID) {
+ struct r600_bc_alu alu;
+ memset(&alu, 0, sizeof(struct r600_bc_alu));
+
+ alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT);
+ alu.src[0].sel = 0;
+ alu.src[0].chan = 3;
+
+ alu.dst.sel = 0;
+ alu.dst.chan = 3;
+ alu.dst.write = 1;
+ alu.last = 1;
+
+ if ((r = r600_bc_add_alu(ctx->bc, &alu)))
+ return r;
+ break;
+ }
+
default:
R600_ERR("unsupported file %d declaration\n", d->Declaration.File);
return -EINVAL;
@@ -481,9 +384,187 @@ static int evergreen_gpr_count(struct r600_shader_ctx *ctx)
return ctx->num_interp_gpr;
}
-int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader)
+static void tgsi_src(struct r600_shader_ctx *ctx,
+ const struct tgsi_full_src_register *tgsi_src,
+ struct r600_shader_src *r600_src)
+{
+ memset(r600_src, 0, sizeof(*r600_src));
+ r600_src->swizzle[0] = tgsi_src->Register.SwizzleX;
+ r600_src->swizzle[1] = tgsi_src->Register.SwizzleY;
+ r600_src->swizzle[2] = tgsi_src->Register.SwizzleZ;
+ r600_src->swizzle[3] = tgsi_src->Register.SwizzleW;
+ r600_src->neg = tgsi_src->Register.Negate;
+ r600_src->abs = tgsi_src->Register.Absolute;
+
+ if (tgsi_src->Register.File == TGSI_FILE_IMMEDIATE) {
+ int index;
+ if ((tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleY) &&
+ (tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleZ) &&
+ (tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleW)) {
+
+ index = tgsi_src->Register.Index * 4 + tgsi_src->Register.SwizzleX;
+ r600_bc_special_constants(ctx->literals[index], &r600_src->sel, &r600_src->neg);
+ if (r600_src->sel != V_SQ_ALU_SRC_LITERAL)
+ return;
+ }
+ index = tgsi_src->Register.Index;
+ r600_src->sel = V_SQ_ALU_SRC_LITERAL;
+ memcpy(r600_src->value, ctx->literals + index * 4, sizeof(r600_src->value));
+ } else if (tgsi_src->Register.File == TGSI_FILE_SYSTEM_VALUE) {
+ /* assume we wan't TGSI_SEMANTIC_INSTANCEID here */
+ r600_src->swizzle[0] = 3;
+ r600_src->swizzle[1] = 3;
+ r600_src->swizzle[2] = 3;
+ r600_src->swizzle[3] = 3;
+ r600_src->sel = 0;
+ } else {
+ if (tgsi_src->Register.Indirect)
+ r600_src->rel = V_SQ_REL_RELATIVE;
+ r600_src->sel = tgsi_src->Register.Index;
+ r600_src->sel += ctx->file_offset[tgsi_src->Register.File];
+ }
+}
+
+static int tgsi_fetch_rel_const(struct r600_shader_ctx *ctx, unsigned int offset, unsigned int dst_reg)
+{
+ struct r600_bc_vtx vtx;
+ unsigned int ar_reg;
+ int r;
+
+ if (offset) {
+ struct r600_bc_alu alu;
+
+ memset(&alu, 0, sizeof(alu));
+
+ alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT);
+ alu.src[0].sel = ctx->ar_reg;
+
+ alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
+ alu.src[1].value = offset;
+
+ alu.dst.sel = dst_reg;
+ alu.dst.write = 1;
+ alu.last = 1;
+
+ if ((r = r600_bc_add_alu(ctx->bc, &alu)))
+ return r;
+
+ ar_reg = dst_reg;
+ } else {
+ ar_reg = ctx->ar_reg;
+ }
+
+ memset(&vtx, 0, sizeof(vtx));
+ vtx.fetch_type = 2; /* VTX_FETCH_NO_INDEX_OFFSET */
+ vtx.src_gpr = ar_reg;
+ vtx.mega_fetch_count = 16;
+ vtx.dst_gpr = dst_reg;
+ vtx.dst_sel_x = 0; /* SEL_X */
+ vtx.dst_sel_y = 1; /* SEL_Y */
+ vtx.dst_sel_z = 2; /* SEL_Z */
+ vtx.dst_sel_w = 3; /* SEL_W */
+ vtx.data_format = FMT_32_32_32_32_FLOAT;
+ vtx.num_format_all = 2; /* NUM_FORMAT_SCALED */
+ vtx.format_comp_all = 1; /* FORMAT_COMP_SIGNED */
+ vtx.srf_mode_all = 1; /* SRF_MODE_NO_ZERO */
+
+ if ((r = r600_bc_add_vtx(ctx->bc, &vtx)))
+ return r;
+
+ return 0;
+}
+
+static int tgsi_split_constant(struct r600_shader_ctx *ctx)
+{
+ struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
+ struct r600_bc_alu alu;
+ int i, j, k, nconst, r;
+
+ for (i = 0, nconst = 0; i < inst->Instruction.NumSrcRegs; i++) {
+ if (inst->Src[i].Register.File == TGSI_FILE_CONSTANT) {
+ nconst++;
+ }
+ tgsi_src(ctx, &inst->Src[i], &ctx->src[i]);
+ }
+ for (i = 0, j = nconst - 1; i < inst->Instruction.NumSrcRegs; i++) {
+ if (inst->Src[i].Register.File != TGSI_FILE_CONSTANT) {
+ continue;
+ }
+
+ if (ctx->src[i].rel) {
+ int treg = r600_get_temp(ctx);
+ if ((r = tgsi_fetch_rel_const(ctx, ctx->src[i].sel - 512, treg)))
+ return r;
+
+ ctx->src[i].sel = treg;
+ ctx->src[i].rel = 0;
+ j--;
+ } else if (j > 0) {
+ int treg = r600_get_temp(ctx);
+ for (k = 0; k < 4; k++) {
+ memset(&alu, 0, sizeof(struct r600_bc_alu));
+ alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
+ alu.src[0].sel = ctx->src[i].sel;
+ alu.src[0].chan = k;
+ alu.src[0].rel = ctx->src[i].rel;
+ alu.dst.sel = treg;
+ alu.dst.chan = k;
+ alu.dst.write = 1;
+ if (k == 3)
+ alu.last = 1;
+ r = r600_bc_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ }
+ ctx->src[i].sel = treg;
+ ctx->src[i].rel =0;
+ j--;
+ }
+ }
+ return 0;
+}
+
+/* need to move any immediate into a temp - for trig functions which use literal for PI stuff */
+static int tgsi_split_literal_constant(struct r600_shader_ctx *ctx)
+{
+ struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
+ struct r600_bc_alu alu;
+ int i, j, k, nliteral, r;
+
+ for (i = 0, nliteral = 0; i < inst->Instruction.NumSrcRegs; i++) {
+ if (ctx->src[i].sel == V_SQ_ALU_SRC_LITERAL) {
+ nliteral++;
+ }
+ }
+ for (i = 0, j = nliteral - 1; i < inst->Instruction.NumSrcRegs; i++) {
+ if (j > 0 && ctx->src[i].sel == V_SQ_ALU_SRC_LITERAL) {
+ int treg = r600_get_temp(ctx);
+ for (k = 0; k < 4; k++) {
+ memset(&alu, 0, sizeof(struct r600_bc_alu));
+ alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
+ alu.src[0].sel = ctx->src[i].sel;
+ alu.src[0].chan = k;
+ alu.src[0].value = ctx->src[i].value[k];
+ alu.dst.sel = treg;
+ alu.dst.chan = k;
+ alu.dst.write = 1;
+ if (k == 3)
+ alu.last = 1;
+ r = r600_bc_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ }
+ ctx->src[i].sel = treg;
+ j--;
+ }
+ }
+ return 0;
+}
+
+static int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader)
{
struct tgsi_full_immediate *immediate;
+ struct tgsi_full_property *property;
struct r600_shader_ctx ctx;
struct r600_bc_output output[32];
unsigned output_done, noutput;
@@ -506,7 +587,9 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s
/* Values [0,127] correspond to GPR[0..127].
* Values [128,159] correspond to constant buffer bank 0
* Values [160,191] correspond to constant buffer bank 1
- * Values [256,511] correspond to cfile constants c[0..255].
+ * Values [256,511] correspond to cfile constants c[0..255]. (Gone on EG)
+ * Values [256,287] correspond to constant buffer bank 2 (EG)
+ * Values [288,319] correspond to constant buffer bank 3 (EG)
* Other special values are shown in the list below.
* 244 ALU_SRC_1_DBL_L: special constant 1.0 double-float, LSW. (RV670+)
* 245 ALU_SRC_1_DBL_M: special constant 1.0 double-float, MSW. (RV670+)
@@ -540,15 +623,18 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s
ctx.file_offset[TGSI_FILE_TEMPORARY] = ctx.file_offset[TGSI_FILE_OUTPUT] +
ctx.info.file_count[TGSI_FILE_OUTPUT];
- ctx.file_offset[TGSI_FILE_CONSTANT] = 128;
+ /* Outside the GPR range. This will be translated to one of the
+ * kcache banks later. */
+ ctx.file_offset[TGSI_FILE_CONSTANT] = 512;
- ctx.file_offset[TGSI_FILE_IMMEDIATE] = 253;
- ctx.temp_reg = ctx.file_offset[TGSI_FILE_TEMPORARY] +
+ ctx.file_offset[TGSI_FILE_IMMEDIATE] = V_SQ_ALU_SRC_LITERAL;
+ ctx.ar_reg = ctx.file_offset[TGSI_FILE_TEMPORARY] +
ctx.info.file_count[TGSI_FILE_TEMPORARY];
+ ctx.temp_reg = ctx.ar_reg + 1;
ctx.nliterals = 0;
ctx.literals = NULL;
-
+ shader->fs_write_all = FALSE;
while (!tgsi_parse_end_of_tokens(&ctx.parse)) {
tgsi_parse_token(&ctx.parse);
switch (ctx.parse.FullToken.Token.Type) {
@@ -577,7 +663,12 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s
ctx.max_driver_temp_used = 0;
/* reserve first tmp for everyone */
r600_get_temp(&ctx);
+
opcode = ctx.parse.FullToken.FullInstruction.Instruction.Opcode;
+ if ((r = tgsi_split_constant(&ctx)))
+ goto out_err;
+ if ((r = tgsi_split_literal_constant(&ctx)))
+ goto out_err;
if (ctx.bc->chiprev == CHIPREV_EVERGREEN)
ctx.inst_info = &eg_shader_tgsi_instruction[opcode];
else
@@ -585,9 +676,13 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s
r = ctx.inst_info->process(&ctx);
if (r)
goto out_err;
- r = r600_bc_add_literal(ctx.bc, ctx.value);
- if (r)
- goto out_err;
+ break;
+ case TGSI_TOKEN_TYPE_PROPERTY:
+ property = &ctx.parse.FullToken.FullProperty;
+ if (property->Property.PropertyName == TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS) {
+ if (property->u[0].Data == 1)
+ shader->fs_write_all = TRUE;
+ }
break;
default:
R600_ERR("unsupported token type %d\n", ctx.parse.FullToken.Token.Type);
@@ -605,6 +700,7 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s
output[i].swizzle_y = 1;
output[i].swizzle_z = 2;
output[i].swizzle_w = 3;
+ output[i].burst_count = 1;
output[i].barrier = 1;
output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
output[i].array_base = i - pos0;
@@ -668,6 +764,7 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s
output[i].swizzle_y = 1;
output[i].swizzle_z = 2;
output[i].swizzle_w = 3;
+ output[i].burst_count = 1;
output[i].barrier = 1;
output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
output[i].array_base = 0;
@@ -684,6 +781,7 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s
output[0].swizzle_y = 7;
output[0].swizzle_z = 7;
output[0].swizzle_w = 7;
+ output[0].burst_count = 1;
output[0].barrier = 1;
output[0].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
output[0].array_base = 0;
@@ -726,33 +824,22 @@ static int tgsi_end(struct r600_shader_ctx *ctx)
return 0;
}
-static int tgsi_src(struct r600_shader_ctx *ctx,
- const struct tgsi_full_src_register *tgsi_src,
- struct r600_bc_alu_src *r600_src)
+static void r600_bc_src(struct r600_bc_alu_src *bc_src,
+ const struct r600_shader_src *shader_src,
+ unsigned chan)
{
- int index;
- memset(r600_src, 0, sizeof(struct r600_bc_alu_src));
- r600_src->sel = tgsi_src->Register.Index;
- if (tgsi_src->Register.File == TGSI_FILE_IMMEDIATE) {
- r600_src->sel = 0;
- index = tgsi_src->Register.Index;
- ctx->value[0] = ctx->literals[index * 4 + 0];
- ctx->value[1] = ctx->literals[index * 4 + 1];
- ctx->value[2] = ctx->literals[index * 4 + 2];
- ctx->value[3] = ctx->literals[index * 4 + 3];
- }
- if (tgsi_src->Register.Indirect)
- r600_src->rel = V_SQ_REL_RELATIVE;
- r600_src->neg = tgsi_src->Register.Negate;
- r600_src->abs = tgsi_src->Register.Absolute;
- r600_src->sel += ctx->file_offset[tgsi_src->Register.File];
- return 0;
+ bc_src->sel = shader_src->sel;
+ bc_src->chan = shader_src->swizzle[chan];
+ bc_src->neg = shader_src->neg;
+ bc_src->abs = shader_src->abs;
+ bc_src->rel = shader_src->rel;
+ bc_src->value = shader_src->value[bc_src->chan];
}
-static int tgsi_dst(struct r600_shader_ctx *ctx,
- const struct tgsi_full_dst_register *tgsi_dst,
- unsigned swizzle,
- struct r600_bc_alu_dst *r600_dst)
+static void tgsi_dst(struct r600_shader_ctx *ctx,
+ const struct tgsi_full_dst_register *tgsi_dst,
+ unsigned swizzle,
+ struct r600_bc_alu_dst *r600_dst)
{
struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
@@ -765,146 +852,42 @@ static int tgsi_dst(struct r600_shader_ctx *ctx,
if (inst->Instruction.Saturate) {
r600_dst->clamp = 1;
}
- return 0;
}
-static unsigned tgsi_chan(const struct tgsi_full_src_register *tgsi_src, unsigned swizzle)
+static int tgsi_last_instruction(unsigned writemask)
{
- switch (swizzle) {
- case 0:
- return tgsi_src->Register.SwizzleX;
- case 1:
- return tgsi_src->Register.SwizzleY;
- case 2:
- return tgsi_src->Register.SwizzleZ;
- case 3:
- return tgsi_src->Register.SwizzleW;
- default:
- return 0;
- }
-}
+ int i, lasti = 0;
-static int tgsi_split_constant(struct r600_shader_ctx *ctx, struct r600_bc_alu_src r600_src[3])
-{
- struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
- struct r600_bc_alu alu;
- int i, j, k, nconst, r;
-
- for (i = 0, nconst = 0; i < inst->Instruction.NumSrcRegs; i++) {
- if (inst->Src[i].Register.File == TGSI_FILE_CONSTANT) {
- nconst++;
- }
- r = tgsi_src(ctx, &inst->Src[i], &r600_src[i]);
- if (r) {
- return r;
- }
- }
- for (i = 0, j = nconst - 1; i < inst->Instruction.NumSrcRegs; i++) {
- if (j > 0 && inst->Src[i].Register.File == TGSI_FILE_CONSTANT) {
- int treg = r600_get_temp(ctx);
- for (k = 0; k < 4; k++) {
- memset(&alu, 0, sizeof(struct r600_bc_alu));
- alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
- alu.src[0].sel = r600_src[i].sel;
- alu.src[0].chan = k;
- alu.src[0].rel = r600_src[i].rel;
- alu.dst.sel = treg;
- alu.dst.chan = k;
- alu.dst.write = 1;
- if (k == 3)
- alu.last = 1;
- r = r600_bc_add_alu(ctx->bc, &alu);
- if (r)
- return r;
- }
- r600_src[i].sel = treg;
- r600_src[i].rel =0;
- j--;
- }
- }
- return 0;
-}
-
-/* need to move any immediate into a temp - for trig functions which use literal for PI stuff */
-static int tgsi_split_literal_constant(struct r600_shader_ctx *ctx, struct r600_bc_alu_src r600_src[3])
-{
- struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
- struct r600_bc_alu alu;
- int i, j, k, nliteral, r;
-
- for (i = 0, nliteral = 0; i < inst->Instruction.NumSrcRegs; i++) {
- if (inst->Src[i].Register.File == TGSI_FILE_IMMEDIATE) {
- nliteral++;
- }
- }
- for (i = 0, j = nliteral - 1; i < inst->Instruction.NumSrcRegs; i++) {
- if (j > 0 && inst->Src[i].Register.File == TGSI_FILE_IMMEDIATE) {
- int treg = r600_get_temp(ctx);
- for (k = 0; k < 4; k++) {
- memset(&alu, 0, sizeof(struct r600_bc_alu));
- alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
- alu.src[0].sel = r600_src[i].sel;
- alu.src[0].chan = k;
- alu.dst.sel = treg;
- alu.dst.chan = k;
- alu.dst.write = 1;
- if (k == 3)
- alu.last = 1;
- r = r600_bc_add_alu(ctx->bc, &alu);
- if (r)
- return r;
- }
- r = r600_bc_add_literal(ctx->bc, &ctx->literals[inst->Src[i].Register.Index * 4]);
- if (r)
- return r;
- r600_src[i].sel = treg;
- j--;
+ for (i = 0; i < 4; i++) {
+ if (writemask & (1 << i)) {
+ lasti = i;
}
}
- return 0;
+ return lasti;
}
static int tgsi_op2_s(struct r600_shader_ctx *ctx, int swap)
{
struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
- struct r600_bc_alu_src r600_src[3];
struct r600_bc_alu alu;
int i, j, r;
- int lasti = 0;
+ int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
- for (i = 0; i < 4; i++) {
- if (inst->Dst[0].Register.WriteMask & (1 << i)) {
- lasti = i;
- }
- }
-
- r = tgsi_split_constant(ctx, r600_src);
- if (r)
- return r;
- r = tgsi_split_literal_constant(ctx, r600_src);
- if (r)
- return r;
for (i = 0; i < lasti + 1; i++) {
if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
continue;
memset(&alu, 0, sizeof(struct r600_bc_alu));
- r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
- if (r)
- return r;
+ tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
alu.inst = ctx->inst_info->r600_opcode;
if (!swap) {
for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
- alu.src[j] = r600_src[j];
- alu.src[j].chan = tgsi_chan(&inst->Src[j], i);
+ r600_bc_src(&alu.src[j], &ctx->src[j], i);
}
} else {
- alu.src[0] = r600_src[1];
- alu.src[0].chan = tgsi_chan(&inst->Src[1], i);
-
- alu.src[1] = r600_src[0];
- alu.src[1].chan = tgsi_chan(&inst->Src[0], i);
+ r600_bc_src(&alu.src[0], &ctx->src[1], i);
+ r600_bc_src(&alu.src[1], &ctx->src[0], i);
}
/* handle some special cases */
switch (ctx->inst_info->tgsi_opcode) {
@@ -942,25 +925,15 @@ static int tgsi_op2_swap(struct r600_shader_ctx *ctx)
* r700 - normalize by dividing by 2PI
* see fdo bug 27901
*/
-static int tgsi_setup_trig(struct r600_shader_ctx *ctx,
- struct r600_bc_alu_src r600_src[3])
+static int tgsi_setup_trig(struct r600_shader_ctx *ctx)
{
- struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
+ static float half_inv_pi = 1.0 /(3.1415926535 * 2);
+ static float double_pi = 3.1415926535 * 2;
+ static float neg_pi = -3.1415926535;
+
int r;
- uint32_t lit_vals[4];
struct r600_bc_alu alu;
- memset(lit_vals, 0, 4*4);
- r = tgsi_split_constant(ctx, r600_src);
- if (r)
- return r;
- r = tgsi_split_literal_constant(ctx, r600_src);
- if (r)
- return r;
-
- lit_vals[0] = fui(1.0 /(3.1415926535 * 2));
- lit_vals[1] = fui(0.5f);
-
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
alu.is_op3 = 1;
@@ -969,20 +942,17 @@ static int tgsi_setup_trig(struct r600_shader_ctx *ctx,
alu.dst.sel = ctx->temp_reg;
alu.dst.write = 1;
- alu.src[0] = r600_src[0];
- alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
+ r600_bc_src(&alu.src[0], &ctx->src[0], 0);
alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
alu.src[1].chan = 0;
- alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
- alu.src[2].chan = 1;
+ alu.src[1].value = *(uint32_t *)&half_inv_pi;
+ alu.src[2].sel = V_SQ_ALU_SRC_0_5;
+ alu.src[2].chan = 0;
alu.last = 1;
r = r600_bc_add_alu(ctx->bc, &alu);
if (r)
return r;
- r = r600_bc_add_literal(ctx->bc, lit_vals);
- if (r)
- return r;
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT);
@@ -998,14 +968,6 @@ static int tgsi_setup_trig(struct r600_shader_ctx *ctx,
if (r)
return r;
- if (ctx->bc->chiprev == CHIPREV_R600) {
- lit_vals[0] = fui(3.1415926535897f * 2.0f);
- lit_vals[1] = fui(-3.1415926535897f);
- } else {
- lit_vals[0] = fui(1.0f);
- lit_vals[1] = fui(-0.5f);
- }
-
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
alu.is_op3 = 1;
@@ -1020,26 +982,32 @@ static int tgsi_setup_trig(struct r600_shader_ctx *ctx,
alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
alu.src[1].chan = 0;
alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
- alu.src[2].chan = 1;
+ alu.src[2].chan = 0;
+
+ if (ctx->bc->chiprev == CHIPREV_R600) {
+ alu.src[1].value = *(uint32_t *)&double_pi;
+ alu.src[2].value = *(uint32_t *)&neg_pi;
+ } else {
+ alu.src[1].sel = V_SQ_ALU_SRC_1;
+ alu.src[2].sel = V_SQ_ALU_SRC_0_5;
+ alu.src[2].neg = 1;
+ }
+
alu.last = 1;
r = r600_bc_add_alu(ctx->bc, &alu);
if (r)
return r;
- r = r600_bc_add_literal(ctx->bc, lit_vals);
- if (r)
- return r;
return 0;
}
static int tgsi_trig(struct r600_shader_ctx *ctx)
{
struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
- struct r600_bc_alu_src r600_src[3];
struct r600_bc_alu alu;
int i, r;
- int lasti = 0;
+ int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
- r = tgsi_setup_trig(ctx, r600_src);
+ r = tgsi_setup_trig(ctx);
if (r)
return r;
@@ -1057,10 +1025,6 @@ static int tgsi_trig(struct r600_shader_ctx *ctx)
return r;
/* replicate result */
- for (i = 0; i < 4; i++) {
- if (inst->Dst[0].Register.WriteMask & (1 << i))
- lasti = i;
- }
for (i = 0; i < lasti + 1; i++) {
if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
continue;
@@ -1069,9 +1033,7 @@ static int tgsi_trig(struct r600_shader_ctx *ctx)
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
alu.src[0].sel = ctx->temp_reg;
- r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
- if (r)
- return r;
+ tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
if (i == lasti)
alu.last = 1;
r = r600_bc_add_alu(ctx->bc, &alu);
@@ -1084,7 +1046,6 @@ static int tgsi_trig(struct r600_shader_ctx *ctx)
static int tgsi_scs(struct r600_shader_ctx *ctx)
{
struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
- struct r600_bc_alu_src r600_src[3];
struct r600_bc_alu alu;
int r;
@@ -1092,7 +1053,7 @@ static int tgsi_scs(struct r600_shader_ctx *ctx)
* X or Y components of the destination vector.
*/
if (likely(inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY)) {
- r = tgsi_setup_trig(ctx, r600_src);
+ r = tgsi_setup_trig(ctx);
if (r)
return r;
}
@@ -1101,9 +1062,7 @@ static int tgsi_scs(struct r600_shader_ctx *ctx)
if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS);
- r = tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
- if (r)
- return r;
+ tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
alu.src[0].sel = ctx->temp_reg;
alu.src[0].chan = 0;
@@ -1117,9 +1076,7 @@ static int tgsi_scs(struct r600_shader_ctx *ctx)
if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN);
- r = tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst);
- if (r)
- return r;
+ tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst);
alu.src[0].sel = ctx->temp_reg;
alu.src[0].chan = 0;
@@ -1135,9 +1092,7 @@ static int tgsi_scs(struct r600_shader_ctx *ctx)
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
- r = tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
- if (r)
- return r;
+ tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
alu.src[0].sel = V_SQ_ALU_SRC_0;
alu.src[0].chan = 0;
@@ -1147,10 +1102,6 @@ static int tgsi_scs(struct r600_shader_ctx *ctx)
r = r600_bc_add_alu(ctx->bc, &alu);
if (r)
return r;
-
- r = r600_bc_add_literal(ctx->bc, ctx->value);
- if (r)
- return r;
}
/* dst.w = 1.0; */
@@ -1159,9 +1110,7 @@ static int tgsi_scs(struct r600_shader_ctx *ctx)
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
- r = tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst);
- if (r)
- return r;
+ tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst);
alu.src[0].sel = V_SQ_ALU_SRC_1;
alu.src[0].chan = 0;
@@ -1171,10 +1120,6 @@ static int tgsi_scs(struct r600_shader_ctx *ctx)
r = r600_bc_add_alu(ctx->bc, &alu);
if (r)
return r;
-
- r = r600_bc_add_literal(ctx->bc, ctx->value);
- if (r)
- return r;
}
return 0;
@@ -1182,7 +1127,6 @@ static int tgsi_scs(struct r600_shader_ctx *ctx)
static int tgsi_kill(struct r600_shader_ctx *ctx)
{
- struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
struct r600_bc_alu alu;
int i, r;
@@ -1198,10 +1142,7 @@ static int tgsi_kill(struct r600_shader_ctx *ctx)
alu.src[1].sel = V_SQ_ALU_SRC_1;
alu.src[1].neg = 1;
} else {
- r = tgsi_src(ctx, &inst->Src[0], &alu.src[1]);
- if (r)
- return r;
- alu.src[1].chan = tgsi_chan(&inst->Src[0], i);
+ r600_bc_src(&alu.src[1], &ctx->src[0], i);
}
if (i == 3) {
alu.last = 1;
@@ -1210,9 +1151,6 @@ static int tgsi_kill(struct r600_shader_ctx *ctx)
if (r)
return r;
}
- r = r600_bc_add_literal(ctx->bc, ctx->value);
- if (r)
- return r;
/* kill must be last in ALU */
ctx->bc->force_add_cf = 1;
@@ -1224,24 +1162,14 @@ static int tgsi_lit(struct r600_shader_ctx *ctx)
{
struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
struct r600_bc_alu alu;
- struct r600_bc_alu_src r600_src[3];
int r;
- r = tgsi_split_constant(ctx, r600_src);
- if (r)
- return r;
- r = tgsi_split_literal_constant(ctx, r600_src);
- if (r)
- return r;
-
/* dst.x, <- 1.0 */
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
alu.src[0].sel = V_SQ_ALU_SRC_1; /*1.0*/
alu.src[0].chan = 0;
- r = tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
- if (r)
- return r;
+ tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
alu.dst.write = (inst->Dst[0].Register.WriteMask >> 0) & 1;
r = r600_bc_add_alu(ctx->bc, &alu);
if (r)
@@ -1250,12 +1178,10 @@ static int tgsi_lit(struct r600_shader_ctx *ctx)
/* dst.y = max(src.x, 0.0) */
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX);
- alu.src[0] = r600_src[0];
+ r600_bc_src(&alu.src[0], &ctx->src[0], 0);
alu.src[1].sel = V_SQ_ALU_SRC_0; /*0.0*/
alu.src[1].chan = 0;
- r = tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst);
- if (r)
- return r;
+ tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst);
alu.dst.write = (inst->Dst[0].Register.WriteMask >> 1) & 1;
r = r600_bc_add_alu(ctx->bc, &alu);
if (r)
@@ -1266,19 +1192,13 @@ static int tgsi_lit(struct r600_shader_ctx *ctx)
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
alu.src[0].sel = V_SQ_ALU_SRC_1;
alu.src[0].chan = 0;
- r = tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst);
- if (r)
- return r;
+ tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst);
alu.dst.write = (inst->Dst[0].Register.WriteMask >> 3) & 1;
alu.last = 1;
r = r600_bc_add_alu(ctx->bc, &alu);
if (r)
return r;
- r = r600_bc_add_literal(ctx->bc, ctx->value);
- if (r)
- return r;
-
if (inst->Dst[0].Register.WriteMask & (1 << 2))
{
int chan;
@@ -1287,33 +1207,24 @@ static int tgsi_lit(struct r600_shader_ctx *ctx)
/* dst.z = log(src.y) */
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED);
- alu.src[0] = r600_src[0];
- alu.src[0].chan = tgsi_chan(&inst->Src[0], 1);
- r = tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
- if (r)
- return r;
+ r600_bc_src(&alu.src[0], &ctx->src[0], 1);
+ tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
alu.last = 1;
r = r600_bc_add_alu(ctx->bc, &alu);
if (r)
return r;
- r = r600_bc_add_literal(ctx->bc, ctx->value);
- if (r)
- return r;
-
chan = alu.dst.chan;
sel = alu.dst.sel;
/* tmp.x = amd MUL_LIT(src.w, dst.z, src.x ) */
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT);
- alu.src[0] = r600_src[0];
- alu.src[0].chan = tgsi_chan(&inst->Src[0], 3);
+ r600_bc_src(&alu.src[0], &ctx->src[0], 3);
alu.src[1].sel = sel;
alu.src[1].chan = chan;
- alu.src[2] = r600_src[0];
- alu.src[2].chan = tgsi_chan(&inst->Src[0], 0);
+ r600_bc_src(&alu.src[2], &ctx->src[0], 0);
alu.dst.sel = ctx->temp_reg;
alu.dst.chan = 0;
alu.dst.write = 1;
@@ -1323,17 +1234,12 @@ static int tgsi_lit(struct r600_shader_ctx *ctx)
if (r)
return r;
- r = r600_bc_add_literal(ctx->bc, ctx->value);
- if (r)
- return r;
/* dst.z = exp(tmp.x) */
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
alu.src[0].sel = ctx->temp_reg;
alu.src[0].chan = 0;
- r = tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
- if (r)
- return r;
+ tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
alu.last = 1;
r = r600_bc_add_alu(ctx->bc, &alu);
if (r)
@@ -1357,10 +1263,7 @@ static int tgsi_rsq(struct r600_shader_ctx *ctx)
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_CLAMPED);
for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
- r = tgsi_src(ctx, &inst->Src[i], &alu.src[i]);
- if (r)
- return r;
- alu.src[i].chan = tgsi_chan(&inst->Src[i], 0);
+ r600_bc_src(&alu.src[i], &ctx->src[i], 0);
alu.src[i].abs = 1;
}
alu.dst.sel = ctx->temp_reg;
@@ -1369,9 +1272,6 @@ static int tgsi_rsq(struct r600_shader_ctx *ctx)
r = r600_bc_add_alu(ctx->bc, &alu);
if (r)
return r;
- r = r600_bc_add_literal(ctx->bc, ctx->value);
- if (r)
- return r;
/* replicate result */
return tgsi_helper_tempx_replicate(ctx);
}
@@ -1387,9 +1287,7 @@ static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx)
alu.src[0].sel = ctx->temp_reg;
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
alu.dst.chan = i;
- r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
- if (r)
- return r;
+ tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
if (i == 3)
alu.last = 1;
@@ -1409,10 +1307,7 @@ static int tgsi_trans_srcx_replicate(struct r600_shader_ctx *ctx)
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = ctx->inst_info->r600_opcode;
for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
- r = tgsi_src(ctx, &inst->Src[i], &alu.src[i]);
- if (r)
- return r;
- alu.src[i].chan = tgsi_chan(&inst->Src[i], 0);
+ r600_bc_src(&alu.src[i], &ctx->src[i], 0);
}
alu.dst.sel = ctx->temp_reg;
alu.dst.write = 1;
@@ -1420,42 +1315,29 @@ static int tgsi_trans_srcx_replicate(struct r600_shader_ctx *ctx)
r = r600_bc_add_alu(ctx->bc, &alu);
if (r)
return r;
- r = r600_bc_add_literal(ctx->bc, ctx->value);
- if (r)
- return r;
/* replicate result */
return tgsi_helper_tempx_replicate(ctx);
}
static int tgsi_pow(struct r600_shader_ctx *ctx)
{
- struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
struct r600_bc_alu alu;
int r;
/* LOG2(a) */
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
- r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
- if (r)
- return r;
- alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
+ r600_bc_src(&alu.src[0], &ctx->src[0], 0);
alu.dst.sel = ctx->temp_reg;
alu.dst.write = 1;
alu.last = 1;
r = r600_bc_add_alu(ctx->bc, &alu);
if (r)
return r;
- r = r600_bc_add_literal(ctx->bc,ctx->value);
- if (r)
- return r;
/* b * LOG2(a) */
memset(&alu, 0, sizeof(struct r600_bc_alu));
- alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL_IEEE);
- r = tgsi_src(ctx, &inst->Src[1], &alu.src[0]);
- if (r)
- return r;
- alu.src[0].chan = tgsi_chan(&inst->Src[1], 0);
+ alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
+ r600_bc_src(&alu.src[0], &ctx->src[1], 0);
alu.src[1].sel = ctx->temp_reg;
alu.dst.sel = ctx->temp_reg;
alu.dst.write = 1;
@@ -1463,9 +1345,6 @@ static int tgsi_pow(struct r600_shader_ctx *ctx)
r = r600_bc_add_alu(ctx->bc, &alu);
if (r)
return r;
- r = r600_bc_add_literal(ctx->bc,ctx->value);
- if (r)
- return r;
/* POW(a,b) = EXP2(b * LOG2(a))*/
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
@@ -1476,9 +1355,6 @@ static int tgsi_pow(struct r600_shader_ctx *ctx)
r = r600_bc_add_alu(ctx->bc, &alu);
if (r)
return r;
- r = r600_bc_add_literal(ctx->bc,ctx->value);
- if (r)
- return r;
return tgsi_helper_tempx_replicate(ctx);
}
@@ -1486,16 +1362,8 @@ static int tgsi_ssg(struct r600_shader_ctx *ctx)
{
struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
struct r600_bc_alu alu;
- struct r600_bc_alu_src r600_src[3];
int i, r;
- r = tgsi_split_constant(ctx, r600_src);
- if (r)
- return r;
- r = tgsi_split_literal_constant(ctx, r600_src);
- if (r)
- return r;
-
/* tmp = (src > 0 ? 1 : src) */
for (i = 0; i < 4; i++) {
memset(&alu, 0, sizeof(struct r600_bc_alu));
@@ -1505,31 +1373,23 @@ static int tgsi_ssg(struct r600_shader_ctx *ctx)
alu.dst.sel = ctx->temp_reg;
alu.dst.chan = i;
- alu.src[0] = r600_src[0];
- alu.src[0].chan = tgsi_chan(&inst->Src[0], i);
-
+ r600_bc_src(&alu.src[0], &ctx->src[0], i);
alu.src[1].sel = V_SQ_ALU_SRC_1;
+ r600_bc_src(&alu.src[2], &ctx->src[0], i);
- alu.src[2] = r600_src[0];
- alu.src[2].chan = tgsi_chan(&inst->Src[0], i);
if (i == 3)
alu.last = 1;
r = r600_bc_add_alu(ctx->bc, &alu);
if (r)
return r;
}
- r = r600_bc_add_literal(ctx->bc, ctx->value);
- if (r)
- return r;
/* dst = (-tmp > 0 ? -1 : tmp) */
for (i = 0; i < 4; i++) {
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT);
alu.is_op3 = 1;
- r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
- if (r)
- return r;
+ tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
alu.src[0].sel = ctx->temp_reg;
alu.src[0].chan = i;
@@ -1555,9 +1415,6 @@ static int tgsi_helper_copy(struct r600_shader_ctx *ctx, struct tgsi_full_instru
struct r600_bc_alu alu;
int i, r;
- r = r600_bc_add_literal(ctx->bc, ctx->value);
- if (r)
- return r;
for (i = 0; i < 4; i++) {
memset(&alu, 0, sizeof(struct r600_bc_alu));
if (!(inst->Dst[0].Register.WriteMask & (1 << i))) {
@@ -1565,9 +1422,7 @@ static int tgsi_helper_copy(struct r600_shader_ctx *ctx, struct tgsi_full_instru
alu.dst.chan = i;
} else {
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
- r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
- if (r)
- return r;
+ tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
alu.src[0].sel = ctx->temp_reg;
alu.src[0].chan = i;
}
@@ -1584,61 +1439,50 @@ static int tgsi_helper_copy(struct r600_shader_ctx *ctx, struct tgsi_full_instru
static int tgsi_op3(struct r600_shader_ctx *ctx)
{
struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
- struct r600_bc_alu_src r600_src[3];
struct r600_bc_alu alu;
int i, j, r;
+ int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
+
+ for (i = 0; i < lasti + 1; i++) {
+ if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
+ continue;
- r = tgsi_split_constant(ctx, r600_src);
- if (r)
- return r;
- r = tgsi_split_literal_constant(ctx, r600_src);
- if (r)
- return r;
- /* do it in 2 step as op3 doesn't support writemask */
- for (i = 0; i < 4; i++) {
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = ctx->inst_info->r600_opcode;
for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
- alu.src[j] = r600_src[j];
- alu.src[j].chan = tgsi_chan(&inst->Src[j], i);
+ r600_bc_src(&alu.src[j], &ctx->src[j], i);
}
- alu.dst.sel = ctx->temp_reg;
+
+ tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
alu.dst.chan = i;
alu.dst.write = 1;
alu.is_op3 = 1;
- if (i == 3) {
+ if (i == lasti) {
alu.last = 1;
}
r = r600_bc_add_alu(ctx->bc, &alu);
if (r)
return r;
}
- return tgsi_helper_copy(ctx, inst);
+ return 0;
}
static int tgsi_dp(struct r600_shader_ctx *ctx)
{
struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
- struct r600_bc_alu_src r600_src[3];
struct r600_bc_alu alu;
int i, j, r;
- r = tgsi_split_constant(ctx, r600_src);
- if (r)
- return r;
- r = tgsi_split_literal_constant(ctx, r600_src);
- if (r)
- return r;
for (i = 0; i < 4; i++) {
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = ctx->inst_info->r600_opcode;
for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
- alu.src[j] = r600_src[j];
- alu.src[j].chan = tgsi_chan(&inst->Src[j], i);
+ r600_bc_src(&alu.src[j], &ctx->src[j], i);
}
- alu.dst.sel = ctx->temp_reg;
+
+ tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
alu.dst.chan = i;
- alu.dst.write = 1;
+ alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
/* handle some special cases */
switch (ctx->inst_info->tgsi_opcode) {
case TGSI_OPCODE_DP2:
@@ -1670,19 +1514,21 @@ static int tgsi_dp(struct r600_shader_ctx *ctx)
if (r)
return r;
}
- return tgsi_helper_copy(ctx, inst);
+ return 0;
}
static int tgsi_tex(struct r600_shader_ctx *ctx)
{
+ static float one_point_five = 1.5f;
struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
struct r600_bc_tex tex;
struct r600_bc_alu alu;
unsigned src_gpr;
int r, i;
int opcode;
- boolean src_not_temp = inst->Src[0].Register.File != TGSI_FILE_TEMPORARY;
- uint32_t lit_vals[4];
+ boolean src_not_temp =
+ inst->Src[0].Register.File != TGSI_FILE_TEMPORARY &&
+ inst->Src[0].Register.File != TGSI_FILE_INPUT;
src_gpr = ctx->file_offset[inst->Src[0].Register.File] + inst->Src[0].Register.Index;
@@ -1690,11 +1536,8 @@ static int tgsi_tex(struct r600_shader_ctx *ctx)
/* Add perspective divide */
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
- r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
- if (r)
- return r;
+ r600_bc_src(&alu.src[0], &ctx->src[0], 3);
- alu.src[0].chan = tgsi_chan(&inst->Src[0], 3);
alu.dst.sel = ctx->temp_reg;
alu.dst.chan = 3;
alu.last = 1;
@@ -1708,10 +1551,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx)
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
alu.src[0].sel = ctx->temp_reg;
alu.src[0].chan = 3;
- r = tgsi_src(ctx, &inst->Src[0], &alu.src[1]);
- if (r)
- return r;
- alu.src[1].chan = tgsi_chan(&inst->Src[0], i);
+ r600_bc_src(&alu.src[1], &ctx->src[0], i);
alu.dst.sel = ctx->temp_reg;
alu.dst.chan = i;
alu.dst.write = 1;
@@ -1735,43 +1575,15 @@ static int tgsi_tex(struct r600_shader_ctx *ctx)
}
if (inst->Texture.Texture == TGSI_TEXTURE_CUBE) {
- int src_chan, src2_chan;
+ static const unsigned src0_swizzle[] = {2, 2, 0, 1};
+ static const unsigned src1_swizzle[] = {1, 0, 2, 2};
/* tmp1.xyzw = CUBE(R0.zzxy, R0.yxzz) */
for (i = 0; i < 4; i++) {
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE);
- switch (i) {
- case 0:
- src_chan = 2;
- src2_chan = 1;
- break;
- case 1:
- src_chan = 2;
- src2_chan = 0;
- break;
- case 2:
- src_chan = 0;
- src2_chan = 2;
- break;
- case 3:
- src_chan = 1;
- src2_chan = 2;
- break;
- default:
- assert(0);
- src_chan = 0;
- src2_chan = 0;
- break;
- }
- r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
- if (r)
- return r;
- alu.src[0].chan = tgsi_chan(&inst->Src[0], src_chan);
- r = tgsi_src(ctx, &inst->Src[0], &alu.src[1]);
- if (r)
- return r;
- alu.src[1].chan = tgsi_chan(&inst->Src[0], src2_chan);
+ r600_bc_src(&alu.src[0], &ctx->src[0], src0_swizzle[i]);
+ r600_bc_src(&alu.src[1], &ctx->src[0], src1_swizzle[i]);
alu.dst.sel = ctx->temp_reg;
alu.dst.chan = i;
if (i == 3)
@@ -1811,6 +1623,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx)
alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
alu.src[2].chan = 0;
+ alu.src[2].value = *(uint32_t *)&one_point_five;
alu.dst.sel = ctx->temp_reg;
alu.dst.chan = 0;
@@ -1831,6 +1644,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx)
alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
alu.src[2].chan = 0;
+ alu.src[2].value = *(uint32_t *)&one_point_five;
alu.dst.sel = ctx->temp_reg;
alu.dst.chan = 1;
@@ -1841,11 +1655,6 @@ static int tgsi_tex(struct r600_shader_ctx *ctx)
if (r)
return r;
- lit_vals[0] = fui(1.5f);
-
- r = r600_bc_add_literal(ctx->bc, lit_vals);
- if (r)
- return r;
src_not_temp = FALSE;
src_gpr = ctx->temp_reg;
}
@@ -1854,8 +1663,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx)
for (i = 0; i < 4; i++) {
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
- alu.src[0].sel = src_gpr;
- alu.src[0].chan = tgsi_chan(&inst->Src[0], i);
+ r600_bc_src(&alu.src[0], &ctx->src[0], i);
alu.dst.sel = ctx->temp_reg;
alu.dst.chan = i;
if (i == 3)
@@ -1876,7 +1684,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx)
memset(&tex, 0, sizeof(struct r600_bc_tex));
tex.inst = opcode;
tex.sampler_id = ctx->file_offset[inst->Src[1].Register.File] + inst->Src[1].Register.Index;
- tex.resource_id = tex.sampler_id;
+ tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS;
tex.src_gpr = src_gpr;
tex.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index;
tex.dst_sel_x = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7;
@@ -1902,6 +1710,12 @@ static int tgsi_tex(struct r600_shader_ctx *ctx)
tex.coord_type_w = 1;
}
+ if (inst->Texture.Texture == TGSI_TEXTURE_1D_ARRAY) {
+ tex.coord_type_z = 0;
+ tex.src_sel_z = 1;
+ } else if (inst->Texture.Texture == TGSI_TEXTURE_2D_ARRAY)
+ tex.coord_type_z = 0;
+
if (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D || inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D)
tex.src_sel_w = 2;
@@ -1916,29 +1730,48 @@ static int tgsi_tex(struct r600_shader_ctx *ctx)
static int tgsi_lrp(struct r600_shader_ctx *ctx)
{
struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
- struct r600_bc_alu_src r600_src[3];
struct r600_bc_alu alu;
+ int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
unsigned i;
int r;
- r = tgsi_split_constant(ctx, r600_src);
- if (r)
- return r;
- r = tgsi_split_literal_constant(ctx, r600_src);
- if (r)
- return r;
+ /* optimize if it's just an equal balance */
+ if (ctx->src[0].sel == V_SQ_ALU_SRC_0_5) {
+ for (i = 0; i < lasti + 1; i++) {
+ if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
+ continue;
+
+ memset(&alu, 0, sizeof(struct r600_bc_alu));
+ alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD);
+ r600_bc_src(&alu.src[0], &ctx->src[1], i);
+ r600_bc_src(&alu.src[1], &ctx->src[2], i);
+ alu.omod = 3;
+ tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
+ alu.dst.chan = i;
+ if (i == lasti) {
+ alu.last = 1;
+ }
+ r = r600_bc_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ }
+ return 0;
+ }
+
/* 1 - src0 */
- for (i = 0; i < 4; i++) {
+ for (i = 0; i < lasti + 1; i++) {
+ if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
+ continue;
+
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD);
alu.src[0].sel = V_SQ_ALU_SRC_1;
alu.src[0].chan = 0;
- alu.src[1] = r600_src[0];
- alu.src[1].chan = tgsi_chan(&inst->Src[0], i);
+ r600_bc_src(&alu.src[1], &ctx->src[0], i);
alu.src[1].neg = 1;
alu.dst.sel = ctx->temp_reg;
alu.dst.chan = i;
- if (i == 3) {
+ if (i == lasti) {
alu.last = 1;
}
alu.dst.write = 1;
@@ -1946,21 +1779,20 @@ static int tgsi_lrp(struct r600_shader_ctx *ctx)
if (r)
return r;
}
- r = r600_bc_add_literal(ctx->bc, ctx->value);
- if (r)
- return r;
/* (1 - src0) * src2 */
- for (i = 0; i < 4; i++) {
+ for (i = 0; i < lasti + 1; i++) {
+ if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
+ continue;
+
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
alu.src[0].sel = ctx->temp_reg;
alu.src[0].chan = i;
- alu.src[1] = r600_src[2];
- alu.src[1].chan = tgsi_chan(&inst->Src[2], i);
+ r600_bc_src(&alu.src[1], &ctx->src[2], i);
alu.dst.sel = ctx->temp_reg;
alu.dst.chan = i;
- if (i == 3) {
+ if (i == lasti) {
alu.last = 1;
}
alu.dst.write = 1;
@@ -1968,88 +1800,66 @@ static int tgsi_lrp(struct r600_shader_ctx *ctx)
if (r)
return r;
}
- r = r600_bc_add_literal(ctx->bc, ctx->value);
- if (r)
- return r;
/* src0 * src1 + (1 - src0) * src2 */
- for (i = 0; i < 4; i++) {
+ for (i = 0; i < lasti + 1; i++) {
+ if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
+ continue;
+
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
alu.is_op3 = 1;
- alu.src[0] = r600_src[0];
- alu.src[0].chan = tgsi_chan(&inst->Src[0], i);
- alu.src[1] = r600_src[1];
- alu.src[1].chan = tgsi_chan(&inst->Src[1], i);
+ r600_bc_src(&alu.src[0], &ctx->src[0], i);
+ r600_bc_src(&alu.src[1], &ctx->src[1], i);
alu.src[2].sel = ctx->temp_reg;
alu.src[2].chan = i;
- alu.dst.sel = ctx->temp_reg;
+
+ tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
alu.dst.chan = i;
- if (i == 3) {
+ if (i == lasti) {
alu.last = 1;
}
r = r600_bc_add_alu(ctx->bc, &alu);
if (r)
return r;
}
- return tgsi_helper_copy(ctx, inst);
+ return 0;
}
static int tgsi_cmp(struct r600_shader_ctx *ctx)
{
struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
- struct r600_bc_alu_src r600_src[3];
struct r600_bc_alu alu;
- int use_temp = 0;
int i, r;
+ int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
- r = tgsi_split_constant(ctx, r600_src);
- if (r)
- return r;
- r = tgsi_split_literal_constant(ctx, r600_src);
- if (r)
- return r;
-
- if (inst->Dst[0].Register.WriteMask != 0xf)
- use_temp = 1;
+ for (i = 0; i < lasti + 1; i++) {
+ if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
+ continue;
- for (i = 0; i < 4; i++) {
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE);
- alu.src[0] = r600_src[0];
- alu.src[0].chan = tgsi_chan(&inst->Src[0], i);
-
- alu.src[1] = r600_src[2];
- alu.src[1].chan = tgsi_chan(&inst->Src[2], i);
-
- alu.src[2] = r600_src[1];
- alu.src[2].chan = tgsi_chan(&inst->Src[1], i);
-
- if (use_temp)
- alu.dst.sel = ctx->temp_reg;
- else {
- r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
- if (r)
- return r;
- }
+ r600_bc_src(&alu.src[0], &ctx->src[0], i);
+ r600_bc_src(&alu.src[1], &ctx->src[2], i);
+ r600_bc_src(&alu.src[2], &ctx->src[1], i);
+ tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
alu.dst.chan = i;
alu.dst.write = 1;
alu.is_op3 = 1;
- if (i == 3)
+ if (i == lasti)
alu.last = 1;
r = r600_bc_add_alu(ctx->bc, &alu);
if (r)
return r;
}
- if (use_temp)
- return tgsi_helper_copy(ctx, inst);
return 0;
}
static int tgsi_xpd(struct r600_shader_ctx *ctx)
{
struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
- struct r600_bc_alu_src r600_src[3];
+ static const unsigned int src0_swizzle[] = {2, 0, 1};
+ static const unsigned int src1_swizzle[] = {1, 2, 0};
struct r600_bc_alu alu;
uint32_t use_temp = 0;
int i, r;
@@ -2057,45 +1867,15 @@ static int tgsi_xpd(struct r600_shader_ctx *ctx)
if (inst->Dst[0].Register.WriteMask != 0xf)
use_temp = 1;
- r = tgsi_split_constant(ctx, r600_src);
- if (r)
- return r;
- r = tgsi_split_literal_constant(ctx, r600_src);
- if (r)
- return r;
-
for (i = 0; i < 4; i++) {
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
-
- alu.src[0] = r600_src[0];
- switch (i) {
- case 0:
- alu.src[0].chan = tgsi_chan(&inst->Src[0], 2);
- break;
- case 1:
- alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
- break;
- case 2:
- alu.src[0].chan = tgsi_chan(&inst->Src[0], 1);
- break;
- case 3:
+ if (i < 3) {
+ r600_bc_src(&alu.src[0], &ctx->src[0], src0_swizzle[i]);
+ r600_bc_src(&alu.src[1], &ctx->src[1], src1_swizzle[i]);
+ } else {
alu.src[0].sel = V_SQ_ALU_SRC_0;
alu.src[0].chan = i;
- }
-
- alu.src[1] = r600_src[1];
- switch (i) {
- case 0:
- alu.src[1].chan = tgsi_chan(&inst->Src[1], 1);
- break;
- case 1:
- alu.src[1].chan = tgsi_chan(&inst->Src[1], 2);
- break;
- case 2:
- alu.src[1].chan = tgsi_chan(&inst->Src[1], 0);
- break;
- case 3:
alu.src[1].sel = V_SQ_ALU_SRC_0;
alu.src[1].chan = i;
}
@@ -2109,44 +1889,18 @@ static int tgsi_xpd(struct r600_shader_ctx *ctx)
r = r600_bc_add_alu(ctx->bc, &alu);
if (r)
return r;
-
- r = r600_bc_add_literal(ctx->bc, ctx->value);
- if (r)
- return r;
}
for (i = 0; i < 4; i++) {
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
- alu.src[0] = r600_src[0];
- switch (i) {
- case 0:
- alu.src[0].chan = tgsi_chan(&inst->Src[0], 1);
- break;
- case 1:
- alu.src[0].chan = tgsi_chan(&inst->Src[0], 2);
- break;
- case 2:
- alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
- break;
- case 3:
+ if (i < 3) {
+ r600_bc_src(&alu.src[0], &ctx->src[0], src1_swizzle[i]);
+ r600_bc_src(&alu.src[1], &ctx->src[1], src0_swizzle[i]);
+ } else {
alu.src[0].sel = V_SQ_ALU_SRC_0;
alu.src[0].chan = i;
- }
-
- alu.src[1] = r600_src[1];
- switch (i) {
- case 0:
- alu.src[1].chan = tgsi_chan(&inst->Src[1], 2);
- break;
- case 1:
- alu.src[1].chan = tgsi_chan(&inst->Src[1], 0);
- break;
- case 2:
- alu.src[1].chan = tgsi_chan(&inst->Src[1], 1);
- break;
- case 3:
alu.src[1].sel = V_SQ_ALU_SRC_0;
alu.src[1].chan = i;
}
@@ -2157,11 +1911,8 @@ static int tgsi_xpd(struct r600_shader_ctx *ctx)
if (use_temp)
alu.dst.sel = ctx->temp_reg;
- else {
- r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
- if (r)
- return r;
- }
+ else
+ tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
alu.dst.chan = i;
alu.dst.write = 1;
alu.is_op3 = 1;
@@ -2170,10 +1921,6 @@ static int tgsi_xpd(struct r600_shader_ctx *ctx)
r = r600_bc_add_alu(ctx->bc, &alu);
if (r)
return r;
-
- r = r600_bc_add_literal(ctx->bc, ctx->value);
- if (r)
- return r;
}
if (use_temp)
return tgsi_helper_copy(ctx, inst);
@@ -2183,7 +1930,6 @@ static int tgsi_xpd(struct r600_shader_ctx *ctx)
static int tgsi_exp(struct r600_shader_ctx *ctx)
{
struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
- struct r600_bc_alu_src r600_src[3] = { { 0 } };
struct r600_bc_alu alu;
int r;
@@ -2192,11 +1938,7 @@ static int tgsi_exp(struct r600_shader_ctx *ctx)
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR);
- r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
- if (r)
- return r;
-
- alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
+ r600_bc_src(&alu.src[0], &ctx->src[0], 0);
alu.dst.sel = ctx->temp_reg;
alu.dst.chan = 0;
@@ -2206,10 +1948,6 @@ static int tgsi_exp(struct r600_shader_ctx *ctx)
if (r)
return r;
- r = r600_bc_add_literal(ctx->bc, ctx->value);
- if (r)
- return r;
-
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
alu.src[0].sel = ctx->temp_reg;
alu.src[0].chan = 0;
@@ -2221,10 +1959,6 @@ static int tgsi_exp(struct r600_shader_ctx *ctx)
r = r600_bc_add_alu(ctx->bc, &alu);
if (r)
return r;
-
- r = r600_bc_add_literal(ctx->bc, ctx->value);
- if (r)
- return r;
}
/* result.y = tmp - floor(tmp); */
@@ -2232,11 +1966,7 @@ static int tgsi_exp(struct r600_shader_ctx *ctx)
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT);
- alu.src[0] = r600_src[0];
- r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
- if (r)
- return r;
- alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
+ r600_bc_src(&alu.src[0], &ctx->src[0], 0);
alu.dst.sel = ctx->temp_reg;
// r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
@@ -2250,19 +1980,13 @@ static int tgsi_exp(struct r600_shader_ctx *ctx)
r = r600_bc_add_alu(ctx->bc, &alu);
if (r)
return r;
- r = r600_bc_add_literal(ctx->bc, ctx->value);
- if (r)
- return r;
}
/* result.z = RoughApprox2ToX(tmp);*/
if ((inst->Dst[0].Register.WriteMask >> 2) & 0x1) {
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
- r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
- if (r)
- return r;
- alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
+ r600_bc_src(&alu.src[0], &ctx->src[0], 0);
alu.dst.sel = ctx->temp_reg;
alu.dst.write = 1;
@@ -2273,9 +1997,6 @@ static int tgsi_exp(struct r600_shader_ctx *ctx)
r = r600_bc_add_alu(ctx->bc, &alu);
if (r)
return r;
- r = r600_bc_add_literal(ctx->bc, ctx->value);
- if (r)
- return r;
}
/* result.w = 1.0;*/
@@ -2293,9 +2014,6 @@ static int tgsi_exp(struct r600_shader_ctx *ctx)
r = r600_bc_add_alu(ctx->bc, &alu);
if (r)
return r;
- r = r600_bc_add_literal(ctx->bc, ctx->value);
- if (r)
- return r;
}
return tgsi_helper_copy(ctx, inst);
}
@@ -2311,11 +2029,7 @@ static int tgsi_log(struct r600_shader_ctx *ctx)
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
- r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
- if (r)
- return r;
-
- alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
+ r600_bc_src(&alu.src[0], &ctx->src[0], 0);
alu.dst.sel = ctx->temp_reg;
alu.dst.chan = 0;
@@ -2325,10 +2039,6 @@ static int tgsi_log(struct r600_shader_ctx *ctx)
if (r)
return r;
- r = r600_bc_add_literal(ctx->bc, ctx->value);
- if (r)
- return r;
-
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR);
alu.src[0].sel = ctx->temp_reg;
alu.src[0].chan = 0;
@@ -2341,10 +2051,6 @@ static int tgsi_log(struct r600_shader_ctx *ctx)
r = r600_bc_add_alu(ctx->bc, &alu);
if (r)
return r;
-
- r = r600_bc_add_literal(ctx->bc, ctx->value);
- if (r)
- return r;
}
/* result.y = src.x / (2 ^ floor(log2(src.x))); */
@@ -2352,11 +2058,7 @@ static int tgsi_log(struct r600_shader_ctx *ctx)
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
- r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
- if (r)
- return r;
-
- alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
+ r600_bc_src(&alu.src[0], &ctx->src[0], 0);
alu.dst.sel = ctx->temp_reg;
alu.dst.chan = 1;
@@ -2367,10 +2069,6 @@ static int tgsi_log(struct r600_shader_ctx *ctx)
if (r)
return r;
- r = r600_bc_add_literal(ctx->bc, ctx->value);
- if (r)
- return r;
-
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR);
@@ -2386,10 +2084,6 @@ static int tgsi_log(struct r600_shader_ctx *ctx)
if (r)
return r;
- r = r600_bc_add_literal(ctx->bc, ctx->value);
- if (r)
- return r;
-
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
@@ -2405,10 +2099,6 @@ static int tgsi_log(struct r600_shader_ctx *ctx)
if (r)
return r;
- r = r600_bc_add_literal(ctx->bc, ctx->value);
- if (r)
- return r;
-
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
@@ -2424,19 +2114,11 @@ static int tgsi_log(struct r600_shader_ctx *ctx)
if (r)
return r;
- r = r600_bc_add_literal(ctx->bc, ctx->value);
- if (r)
- return r;
-
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
- r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
- if (r)
- return r;
-
- alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
+ r600_bc_src(&alu.src[0], &ctx->src[0], 0);
alu.src[1].sel = ctx->temp_reg;
alu.src[1].chan = 1;
@@ -2449,10 +2131,6 @@ static int tgsi_log(struct r600_shader_ctx *ctx)
r = r600_bc_add_alu(ctx->bc, &alu);
if (r)
return r;
-
- r = r600_bc_add_literal(ctx->bc, ctx->value);
- if (r)
- return r;
}
/* result.z = log2(src);*/
@@ -2460,11 +2138,7 @@ static int tgsi_log(struct r600_shader_ctx *ctx)
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
- r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
- if (r)
- return r;
-
- alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
+ r600_bc_src(&alu.src[0], &ctx->src[0], 0);
alu.dst.sel = ctx->temp_reg;
alu.dst.write = 1;
@@ -2474,10 +2148,6 @@ static int tgsi_log(struct r600_shader_ctx *ctx)
r = r600_bc_add_alu(ctx->bc, &alu);
if (r)
return r;
-
- r = r600_bc_add_literal(ctx->bc, ctx->value);
- if (r)
- return r;
}
/* result.w = 1.0; */
@@ -2496,10 +2166,6 @@ static int tgsi_log(struct r600_shader_ctx *ctx)
r = r600_bc_add_alu(ctx->bc, &alu);
if (r)
return r;
-
- r = r600_bc_add_literal(ctx->bc, ctx->value);
- if (r)
- return r;
}
return tgsi_helper_copy(ctx, inst);
@@ -2510,6 +2176,7 @@ static int tgsi_eg_arl(struct r600_shader_ctx *ctx)
struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
struct r600_bc_alu alu;
int r;
+
memset(&alu, 0, sizeof(struct r600_bc_alu));
switch (inst->Instruction.Opcode) {
@@ -2524,26 +2191,26 @@ static int tgsi_eg_arl(struct r600_shader_ctx *ctx)
return -1;
}
- r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
- if (r)
- return r;
- alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
+ r600_bc_src(&alu.src[0], &ctx->src[0], 0);
alu.last = 1;
- alu.dst.chan = 0;
- alu.dst.sel = ctx->temp_reg;
+ alu.dst.sel = ctx->ar_reg;
alu.dst.write = 1;
- r = r600_bc_add_alu_type(ctx->bc, &alu, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU));
+ r = r600_bc_add_alu(ctx->bc, &alu);
if (r)
return r;
+
+ /* TODO: Note that the MOVA can be avoided if we never use AR for
+ * indexing non-CB registers in the current ALU clause. Similarly, we
+ * need to load AR from ar_reg again if we started a new clause
+ * between ARL and AR usage. The easy way to do that is to remove
+ * the MOVA here, and load it for the first AR access after ar_reg
+ * has been modified in each clause. */
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT;
- r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
- if (r)
- return r;
- alu.src[0].sel = ctx->temp_reg;
+ alu.src[0].sel = ctx->ar_reg;
alu.src[0].chan = 0;
alu.last = 1;
- r = r600_bc_add_alu_type(ctx->bc, &alu, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU));
+ r = r600_bc_add_alu(ctx->bc, &alu);
if (r)
return r;
return 0;
@@ -2554,29 +2221,51 @@ static int tgsi_r600_arl(struct r600_shader_ctx *ctx)
struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
struct r600_bc_alu alu;
int r;
- memset(&alu, 0, sizeof(struct r600_bc_alu));
switch (inst->Instruction.Opcode) {
case TGSI_OPCODE_ARL:
- alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_FLOOR;
+ memset(&alu, 0, sizeof(alu));
+ alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR;
+ r600_bc_src(&alu.src[0], &ctx->src[0], 0);
+ alu.dst.sel = ctx->ar_reg;
+ alu.dst.write = 1;
+ alu.last = 1;
+
+ if ((r = r600_bc_add_alu(ctx->bc, &alu)))
+ return r;
+
+ memset(&alu, 0, sizeof(alu));
+ alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT;
+ alu.src[0].sel = ctx->ar_reg;
+ alu.dst.sel = ctx->ar_reg;
+ alu.dst.write = 1;
+ alu.last = 1;
+
+ if ((r = r600_bc_add_alu(ctx->bc, &alu)))
+ return r;
break;
case TGSI_OPCODE_ARR:
- alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA;
+ memset(&alu, 0, sizeof(alu));
+ alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT;
+ r600_bc_src(&alu.src[0], &ctx->src[0], 0);
+ alu.dst.sel = ctx->ar_reg;
+ alu.dst.write = 1;
+ alu.last = 1;
+
+ if ((r = r600_bc_add_alu(ctx->bc, &alu)))
+ return r;
break;
default:
assert(0);
return -1;
}
-
- r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
- if (r)
- return r;
- alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
-
+ memset(&alu, 0, sizeof(alu));
+ alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT;
+ alu.src[0].sel = ctx->ar_reg;
alu.last = 1;
- r = r600_bc_add_alu_type(ctx->bc, &alu, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU));
+ r = r600_bc_add_alu(ctx->bc, &alu);
if (r)
return r;
ctx->bc->cf_last->r6xx_uses_waterfall = 1;
@@ -2593,26 +2282,18 @@ static int tgsi_opdst(struct r600_shader_ctx *ctx)
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
- r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
- if (r)
- return r;
+ tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
if (i == 0 || i == 3) {
alu.src[0].sel = V_SQ_ALU_SRC_1;
} else {
- r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
- if (r)
- return r;
- alu.src[0].chan = tgsi_chan(&inst->Src[0], i);
+ r600_bc_src(&alu.src[0], &ctx->src[0], i);
}
- if (i == 0 || i == 2) {
+ if (i == 0 || i == 2) {
alu.src[1].sel = V_SQ_ALU_SRC_1;
} else {
- r = tgsi_src(ctx, &inst->Src[1], &alu.src[1]);
- if (r)
- return r;
- alu.src[1].chan = tgsi_chan(&inst->Src[1], i);
+ r600_bc_src(&alu.src[1], &ctx->src[1], i);
}
if (i == 3)
alu.last = 1;
@@ -2625,7 +2306,6 @@ static int tgsi_opdst(struct r600_shader_ctx *ctx)
static int emit_logic_pred(struct r600_shader_ctx *ctx, int opcode)
{
- struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
struct r600_bc_alu alu;
int r;
@@ -2637,10 +2317,7 @@ static int emit_logic_pred(struct r600_shader_ctx *ctx, int opcode)
alu.dst.write = 1;
alu.dst.chan = 0;
- r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
- if (r)
- return r;
- alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
+ r600_bc_src(&alu.src[0], &ctx->src[0], 0);
alu.src[1].sel = V_SQ_ALU_SRC_0;
alu.src[1].chan = 0;
@@ -2654,9 +2331,25 @@ static int emit_logic_pred(struct r600_shader_ctx *ctx, int opcode)
static int pops(struct r600_shader_ctx *ctx, int pops)
{
- r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_POP));
- ctx->bc->cf_last->pop_count = pops;
- ctx->bc->cf_last->cf_addr = ctx->bc->cf_last->id + 2;
+ int alu_pop = 3;
+ if (ctx->bc->cf_last) {
+ if (ctx->bc->cf_last->inst == CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU) << 3)
+ alu_pop = 0;
+ else if (ctx->bc->cf_last->inst == CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER) << 3)
+ alu_pop = 1;
+ }
+ alu_pop += pops;
+ if (alu_pop == 1) {
+ ctx->bc->cf_last->inst = CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER) << 3;
+ ctx->bc->force_add_cf = 1;
+ } else if (alu_pop == 2) {
+ ctx->bc->cf_last->inst = CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER) << 3;
+ ctx->bc->force_add_cf = 1;
+ } else {
+ r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_POP));
+ ctx->bc->cf_last->pop_count = pops;
+ ctx->bc->cf_last->cf_addr = ctx->bc->cf_last->id + 2;
+ }
return 0;
}
@@ -3002,7 +2695,7 @@ static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = {
{TGSI_OPCODE_CEIL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
{TGSI_OPCODE_I2F, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
{TGSI_OPCODE_NOT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
- {TGSI_OPCODE_TRUNC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_trans_srcx_replicate},
+ {TGSI_OPCODE_TRUNC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_op2},
{TGSI_OPCODE_SHL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
/* gap */
{88, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
@@ -3075,7 +2768,7 @@ static struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] = {
{TGSI_OPCODE_RCP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE, tgsi_trans_srcx_replicate},
{TGSI_OPCODE_RSQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE, tgsi_trans_srcx_replicate},
{TGSI_OPCODE_EXP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp},
- {TGSI_OPCODE_LOG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_LOG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_log},
{TGSI_OPCODE_MUL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2},
{TGSI_OPCODE_ADD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
{TGSI_OPCODE_DP3, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
@@ -3160,7 +2853,7 @@ static struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] = {
{TGSI_OPCODE_CEIL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
{TGSI_OPCODE_I2F, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
{TGSI_OPCODE_NOT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
- {TGSI_OPCODE_TRUNC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_trans_srcx_replicate},
+ {TGSI_OPCODE_TRUNC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_op2},
{TGSI_OPCODE_SHL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
/* gap */
{88, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
diff --git a/src/gallium/drivers/r600/r600_shader.h b/src/gallium/drivers/r600/r600_shader.h
index 35b0331525..8f96ce5085 100644
--- a/src/gallium/drivers/r600/r600_shader.h
+++ b/src/gallium/drivers/r600/r600_shader.h
@@ -45,8 +45,7 @@ struct r600_shader {
struct r600_shader_io output[32];
enum radeon_family family;
boolean uses_kill;
+ boolean fs_write_all;
};
-int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader);
-
#endif
diff --git a/src/gallium/drivers/r600/r600_sq.h b/src/gallium/drivers/r600/r600_sq.h
index 0573e63dc8..56ed35e8b3 100644
--- a/src/gallium/drivers/r600/r600_sq.h
+++ b/src/gallium/drivers/r600/r600_sq.h
@@ -74,6 +74,10 @@
#define S_SQ_CF_ALU_WORD0_KCACHE_MODE0(x) (((x) & 0x3) << 30)
#define G_SQ_CF_ALU_WORD0_KCACHE_MODE0(x) (((x) >> 30) & 0x3)
#define C_SQ_CF_ALU_WORD0_KCACHE_MODE0 0x3FFFFFFF
+#define V_SQ_CF_KCACHE_NOP 0x00000000
+#define V_SQ_CF_KCACHE_LOCK_1 0x00000001
+#define V_SQ_CF_KCACHE_LOCK_2 0x00000002
+#define V_SQ_CF_KCACHE_LOCK_LOOP_INDEX 0x00000003
#define P_SQ_CF_ALU_WORD1
#define S_SQ_CF_ALU_WORD1_KCACHE_MODE1(x) (((x) & 0x3) << 0)
#define G_SQ_CF_ALU_WORD1_KCACHE_MODE1(x) (((x) >> 0) & 0x3)
@@ -187,6 +191,8 @@
#define V_SQ_ALU_SRC_M_1_INT 0x000000FB
#define V_SQ_ALU_SRC_0_5 0x000000FC
#define V_SQ_ALU_SRC_LITERAL 0x000000FD
+#define V_SQ_ALU_SRC_PV 0x000000FE
+#define V_SQ_ALU_SRC_PS 0x000000FF
#define V_SQ_ALU_SRC_PARAM_BASE 0x000001C0
#define S_SQ_ALU_WORD0_SRC0_REL(x) (((x) & 0x1) << 9)
#define G_SQ_ALU_WORD0_SRC0_REL(x) (((x) >> 9) & 0x1)
diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c
index 553d786d65..d3adf0393c 100644
--- a/src/gallium/drivers/r600/r600_state.c
+++ b/src/gallium/drivers/r600/r600_state.c
@@ -36,8 +36,8 @@
#include <util/u_pack_color.h>
#include <util/u_memory.h>
#include <util/u_inlines.h>
-#include <util/u_upload_mgr.h>
#include <util/u_framebuffer.h>
+#include "util/u_transfer.h"
#include <pipebuffer/pb_buffer.h>
#include "r600.h"
#include "r600d.h"
@@ -95,230 +95,6 @@ void r600_polygon_offset_update(struct r600_pipe_context *rctx)
}
}
-/* FIXME optimize away spi update when it's not needed */
-static void r600_spi_update(struct r600_pipe_context *rctx)
-{
- struct r600_pipe_shader *shader = rctx->ps_shader;
- struct r600_pipe_state rstate;
- struct r600_shader *rshader = &shader->shader;
- unsigned i, tmp;
-
- rstate.nregs = 0;
- for (i = 0; i < rshader->ninput; i++) {
- tmp = S_028644_SEMANTIC(r600_find_vs_semantic_index(&rctx->vs_shader->shader, rshader, i));
- if (rshader->input[i].centroid)
- tmp |= S_028644_SEL_CENTROID(1);
- if (rshader->input[i].interpolate == TGSI_INTERPOLATE_LINEAR)
- tmp |= S_028644_SEL_LINEAR(1);
-
- if (rshader->input[i].name == TGSI_SEMANTIC_COLOR ||
- rshader->input[i].name == TGSI_SEMANTIC_BCOLOR ||
- rshader->input[i].name == TGSI_SEMANTIC_POSITION) {
- tmp |= S_028644_FLAT_SHADE(rctx->flatshade);
- }
- if (rshader->input[i].name == TGSI_SEMANTIC_GENERIC &&
- rctx->sprite_coord_enable & (1 << rshader->input[i].sid)) {
- tmp |= S_028644_PT_SPRITE_TEX(1);
- }
- r600_pipe_state_add_reg(&rstate, R_028644_SPI_PS_INPUT_CNTL_0 + i * 4, tmp, 0xFFFFFFFF, NULL);
- }
- r600_context_pipe_state_set(&rctx->ctx, &rstate);
-}
-
-void r600_vertex_buffer_update(struct r600_pipe_context *rctx)
-{
- struct r600_pipe_state *rstate;
- struct r600_resource *rbuffer;
- struct pipe_vertex_buffer *vertex_buffer;
- unsigned i, offset;
-
- /* we don't update until we know vertex elements */
- if (rctx->vertex_elements == NULL || !rctx->nvertex_buffer)
- return;
-
- /* delete previous translated vertex elements */
- if (rctx->tran.new_velems) {
- r600_end_vertex_translate(rctx);
- }
-
- if (rctx->vertex_elements->incompatible_layout) {
- /* translate rebind new vertex elements so
- * return once translated
- */
- r600_begin_vertex_translate(rctx);
- return;
- }
-
- if (rctx->any_user_vbs) {
- r600_upload_user_buffers(rctx);
- rctx->any_user_vbs = FALSE;
- }
-
- if (rctx->vertex_elements->vbuffer_need_offset) {
- /* one resource per vertex elements */
- rctx->nvs_resource = rctx->vertex_elements->count;
- } else {
- /* bind vertex buffer once */
- rctx->nvs_resource = rctx->nvertex_buffer;
- }
-
- for (i = 0 ; i < rctx->nvs_resource; i++) {
- rstate = &rctx->vs_resource[i];
- rstate->id = R600_PIPE_STATE_RESOURCE;
- rstate->nregs = 0;
-
- if (rctx->vertex_elements->vbuffer_need_offset) {
- /* one resource per vertex elements */
- unsigned vbuffer_index;
- vbuffer_index = rctx->vertex_elements->elements[i].vertex_buffer_index;
- vertex_buffer = &rctx->vertex_buffer[vbuffer_index];
- rbuffer = (struct r600_resource*)vertex_buffer->buffer;
- offset = rctx->vertex_elements->vbuffer_offset[i] +
- vertex_buffer->buffer_offset +
- r600_bo_offset(rbuffer->bo);
- } else {
- /* bind vertex buffer once */
- vertex_buffer = &rctx->vertex_buffer[i];
- rbuffer = (struct r600_resource*)vertex_buffer->buffer;
- offset = vertex_buffer->buffer_offset +
- r600_bo_offset(rbuffer->bo);
- }
-
- r600_pipe_state_add_reg(rstate, R_038000_RESOURCE0_WORD0,
- offset, 0xFFFFFFFF, rbuffer->bo);
- r600_pipe_state_add_reg(rstate, R_038004_RESOURCE0_WORD1,
- rbuffer->size - offset - 1, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R_038008_RESOURCE0_WORD2,
- S_038008_STRIDE(vertex_buffer->stride),
- 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R_03800C_RESOURCE0_WORD3,
- 0x00000000, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R_038010_RESOURCE0_WORD4,
- 0x00000000, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R_038014_RESOURCE0_WORD5,
- 0x00000000, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R_038018_RESOURCE0_WORD6,
- 0xC0000000, 0xFFFFFFFF, NULL);
- r600_context_pipe_state_set_fs_resource(&rctx->ctx, rstate, i);
- }
-}
-
-static void r600_draw_common(struct r600_drawl *draw)
-{
- struct r600_pipe_context *rctx = (struct r600_pipe_context *)draw->ctx;
- struct r600_resource *rbuffer;
- unsigned prim;
- u32 vgt_dma_index_type, vgt_draw_initiator, mask;
- struct r600_draw rdraw;
- struct r600_pipe_state vgt;
-
- switch (draw->index_size) {
- case 2:
- vgt_draw_initiator = 0;
- vgt_dma_index_type = 0;
- break;
- case 4:
- vgt_draw_initiator = 0;
- vgt_dma_index_type = 1;
- break;
- case 0:
- vgt_draw_initiator = 2;
- vgt_dma_index_type = 0;
- break;
- default:
- R600_ERR("unsupported index size %d\n", draw->index_size);
- return;
- }
- if (r600_conv_pipe_prim(draw->mode, &prim))
- return;
- if (unlikely(rctx->ps_shader == NULL)) {
- R600_ERR("missing vertex shader\n");
- return;
- }
- if (unlikely(rctx->vs_shader == NULL)) {
- R600_ERR("missing vertex shader\n");
- return;
- }
- /* there should be enough input */
- if (rctx->vertex_elements->count < rctx->vs_shader->shader.bc.nresource) {
- R600_ERR("%d resources provided, expecting %d\n",
- rctx->vertex_elements->count, rctx->vs_shader->shader.bc.nresource);
- return;
- }
-
- r600_spi_update(rctx);
-
- mask = 0;
- for (int i = 0; i < rctx->framebuffer.nr_cbufs; i++) {
- mask |= (0xF << (i * 4));
- }
-
- vgt.id = R600_PIPE_STATE_VGT;
- vgt.nregs = 0;
- r600_pipe_state_add_reg(&vgt, R_008958_VGT_PRIMITIVE_TYPE, prim, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(&vgt, R_028408_VGT_INDX_OFFSET, draw->index_bias, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(&vgt, R_028400_VGT_MAX_VTX_INDX, draw->max_index, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(&vgt, R_028404_VGT_MIN_VTX_INDX, draw->min_index, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(&vgt, R_028238_CB_TARGET_MASK, rctx->cb_target_mask & mask, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(&vgt, R_03CFF0_SQ_VTX_BASE_VTX_LOC, 0, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(&vgt, R_03CFF4_SQ_VTX_START_INST_LOC, 0, 0xFFFFFFFF, NULL);
- r600_context_pipe_state_set(&rctx->ctx, &vgt);
-
- rdraw.vgt_num_indices = draw->count;
- rdraw.vgt_num_instances = 1;
- rdraw.vgt_index_type = vgt_dma_index_type;
- rdraw.vgt_draw_initiator = vgt_draw_initiator;
- rdraw.indices = NULL;
- if (draw->index_buffer) {
- rbuffer = (struct r600_resource*)draw->index_buffer;
- rdraw.indices = rbuffer->bo;
- rdraw.indices_bo_offset = draw->index_buffer_offset;
- }
- r600_context_draw(&rctx->ctx, &rdraw);
-}
-
-void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
-{
- struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
- struct r600_drawl draw;
- boolean translate = FALSE;
-
- memset(&draw, 0, sizeof(struct r600_drawl));
- draw.ctx = ctx;
- draw.mode = info->mode;
- draw.start = info->start;
- draw.count = info->count;
- if (info->indexed && rctx->index_buffer.buffer) {
- draw.start += rctx->index_buffer.offset / rctx->index_buffer.index_size;
- draw.min_index = info->min_index;
- draw.max_index = info->max_index;
- draw.index_bias = info->index_bias;
-
- r600_translate_index_buffer(rctx, &rctx->index_buffer.buffer,
- &rctx->index_buffer.index_size,
- &draw.start,
- info->count);
-
- draw.index_size = rctx->index_buffer.index_size;
- pipe_resource_reference(&draw.index_buffer, rctx->index_buffer.buffer);
- draw.index_buffer_offset = draw.start * draw.index_size;
- draw.start = 0;
- r600_upload_index_buffer(rctx, &draw);
- } else {
- draw.index_size = 0;
- draw.index_buffer = NULL;
- draw.min_index = info->min_index;
- draw.max_index = info->max_index;
- draw.index_bias = info->start;
- }
- r600_draw_common(&draw);
-
- if (translate)
- r600_end_vertex_translate(rctx);
-
- pipe_resource_reference(&draw.index_buffer, NULL);
-}
-
static void r600_set_blend_color(struct pipe_context *ctx,
const struct pipe_blend_color *state)
{
@@ -341,9 +117,10 @@ static void r600_set_blend_color(struct pipe_context *ctx,
static void *r600_create_blend_state(struct pipe_context *ctx,
const struct pipe_blend_state *state)
{
+ struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
struct r600_pipe_blend *blend = CALLOC_STRUCT(r600_pipe_blend);
struct r600_pipe_state *rstate;
- u32 color_control, target_mask;
+ u32 color_control = 0, target_mask;
if (blend == NULL) {
return NULL;
@@ -353,7 +130,10 @@ static void *r600_create_blend_state(struct pipe_context *ctx,
rstate->id = R600_PIPE_STATE_BLEND;
target_mask = 0;
- color_control = S_028808_PER_MRT_BLEND(1);
+
+ /* R600 does not support per-MRT blends */
+ if (rctx->family > CHIP_R600)
+ color_control |= S_028808_PER_MRT_BLEND(1);
if (state->logicop_enable) {
color_control |= (state->logicop_func << 16) | (state->logicop_func << 20);
} else {
@@ -376,8 +156,9 @@ static void *r600_create_blend_state(struct pipe_context *ctx,
}
}
blend->cb_target_mask = target_mask;
+ /* MULTIWRITE_ENABLE is controlled by r600_pipe_shader_ps(). */
r600_pipe_state_add_reg(rstate, R_028808_CB_COLOR_CONTROL,
- color_control, 0xFFFFFFFF, NULL);
+ color_control, 0xFFFFFFFD, NULL);
for (int i = 0; i < 8; i++) {
unsigned eqRGB = state->rt[i].rgb_func;
@@ -403,10 +184,11 @@ static void *r600_create_blend_state(struct pipe_context *ctx,
bc |= S_028804_ALPHA_DESTBLEND(r600_translate_blend_factor(dstA));
}
- r600_pipe_state_add_reg(rstate, R_028780_CB_BLEND0_CONTROL + i * 4, bc, 0xFFFFFFFF, NULL);
- if (i == 0) {
+ /* R600 does not support per-MRT blends */
+ if (rctx->family > CHIP_R600)
+ r600_pipe_state_add_reg(rstate, R_028780_CB_BLEND0_CONTROL + i * 4, bc, 0xFFFFFFFF, NULL);
+ if (i == 0)
r600_pipe_state_add_reg(rstate, R_028804_CB_BLEND_CONTROL, bc, 0xFFFFFFFF, NULL);
- }
}
return rstate;
}
@@ -424,10 +206,6 @@ static void *r600_create_dsa_state(struct pipe_context *ctx,
rstate->id = R600_PIPE_STATE_DSA;
/* depth TODO some of those db_shader_control field depend on shader adjust mask & add it to shader */
- /* db_shader_control is 0xFFFFFFBE as Z_EXPORT_ENABLE (bit 0) will be
- * set by fragment shader if it export Z and KILL_ENABLE (bit 6) will
- * be set if shader use texkill instruction
- */
db_shader_control = S_02880C_Z_ORDER(V_02880C_EARLY_Z_THEN_LATE_Z);
stencil_ref_mask = 0;
stencil_ref_mask_bf = 0;
@@ -486,7 +264,10 @@ static void *r600_create_dsa_state(struct pipe_context *ctx,
r600_pipe_state_add_reg(rstate, R_0286E4_SPI_FOG_FUNC_BIAS, 0x00000000, 0xFFFFFFFF, NULL);
r600_pipe_state_add_reg(rstate, R_0286DC_SPI_FOG_CNTL, 0x00000000, 0xFFFFFFFF, NULL);
r600_pipe_state_add_reg(rstate, R_028800_DB_DEPTH_CONTROL, db_depth_control, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R_02880C_DB_SHADER_CONTROL, db_shader_control, 0xFFFFFFBE, NULL);
+ /* The DB_SHADER_CONTROL mask is 0xFFFFFFBC since Z_EXPORT_ENABLE,
+ * STENCIL_EXPORT_ENABLE and KILL_ENABLE are controlled by
+ * r600_pipe_shader_ps().*/
+ r600_pipe_state_add_reg(rstate, R_02880C_DB_SHADER_CONTROL, db_shader_control, 0xFFFFFFBC, NULL);
r600_pipe_state_add_reg(rstate, R_028D0C_DB_RENDER_CONTROL, db_render_control, 0xFFFFFFFF, NULL);
r600_pipe_state_add_reg(rstate, R_028D10_DB_RENDER_OVERRIDE, db_render_override, 0xFFFFFFFF, NULL);
r600_pipe_state_add_reg(rstate, R_028D2C_DB_SRESULTS_COMPARE_STATE1, 0x00000000, 0xFFFFFFFF, NULL);
@@ -582,11 +363,16 @@ static void *r600_create_sampler_state(struct pipe_context *ctx,
{
struct r600_pipe_state *rstate = CALLOC_STRUCT(r600_pipe_state);
union util_color uc;
+ uint32_t coord_trunc = 0;
if (rstate == NULL) {
return NULL;
}
+ if ((state->mag_img_filter == PIPE_TEX_FILTER_NEAREST) ||
+ (state->min_img_filter == PIPE_TEX_FILTER_NEAREST))
+ coord_trunc = 1;
+
rstate->id = R600_PIPE_STATE_SAMPLER;
util_pack_color(state->border_color, PIPE_FORMAT_B8G8R8A8_UNORM, &uc);
r600_pipe_state_add_reg(rstate, R_03C000_SQ_TEX_SAMPLER_WORD0_0,
@@ -603,7 +389,9 @@ static void *r600_create_sampler_state(struct pipe_context *ctx,
S_03C004_MIN_LOD(S_FIXED(CLAMP(state->min_lod, 0, 15), 6)) |
S_03C004_MAX_LOD(S_FIXED(CLAMP(state->max_lod, 0, 15), 6)) |
S_03C004_LOD_BIAS(S_FIXED(CLAMP(state->lod_bias, -16, 16), 6)), 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R_03C008_SQ_TEX_SAMPLER_WORD2_0, S_03C008_TYPE(1), 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_03C008_SQ_TEX_SAMPLER_WORD2_0,
+ S_03C008_MC_COORD_TRUNCATE(coord_trunc) |
+ S_03C008_TYPE(1), 0xFFFFFFFF, NULL);
if (uc.ui) {
r600_pipe_state_add_reg(rstate, R_00A400_TD_PS_SAMPLER0_BORDER_RED, fui(state->border_color[0]), 0xFFFFFFFF, NULL);
r600_pipe_state_add_reg(rstate, R_00A404_TD_PS_SAMPLER0_BORDER_GREEN, fui(state->border_color[1]), 0xFFFFFFFF, NULL);
@@ -626,6 +414,7 @@ static struct pipe_sampler_view *r600_create_sampler_view(struct pipe_context *c
uint32_t word4 = 0, yuv_format = 0, pitch = 0;
unsigned char swizzle[4], array_mode = 0, tile_type = 0;
struct r600_bo *bo[2];
+ unsigned height, depth;
if (resource == NULL)
return NULL;
@@ -643,7 +432,7 @@ static struct pipe_sampler_view *r600_create_sampler_view(struct pipe_context *c
swizzle[1] = state->swizzle_g;
swizzle[2] = state->swizzle_b;
swizzle[3] = state->swizzle_a;
- format = r600_translate_texformat(state->format,
+ format = r600_translate_texformat(ctx->screen, state->format,
swizzle,
&word4, &yuv_format);
if (format == ~0) {
@@ -653,22 +442,30 @@ static struct pipe_sampler_view *r600_create_sampler_view(struct pipe_context *c
if (desc == NULL) {
R600_ERR("unknow format %d\n", state->format);
}
- tmp = (struct r600_resource_texture*)texture;
+ tmp = (struct r600_resource_texture *)texture;
+ if (tmp->depth && !tmp->is_flushing_texture) {
+ r600_texture_depth_flush(ctx, texture, TRUE);
+ tmp = tmp->flushed_depth_texture;
+ }
+
+ if (tmp->force_int_type) {
+ word4 &= C_038010_NUM_FORMAT_ALL;
+ word4 |= S_038010_NUM_FORMAT_ALL(V_038010_SQ_NUM_FORMAT_INT);
+ }
rbuffer = &tmp->resource;
bo[0] = rbuffer->bo;
bo[1] = rbuffer->bo;
- /* FIXME depth texture decompression */
- if (tmp->depth) {
- r600_texture_depth_flush(ctx, texture);
- tmp = (struct r600_resource_texture*)texture;
- rbuffer = &tmp->flushed_depth_texture->resource;
- bo[0] = rbuffer->bo;
- bo[1] = rbuffer->bo;
- }
- pitch = align(tmp->pitch_in_pixels[0], 8);
- if (tmp->tiled) {
- array_mode = tmp->array_mode[0];
- tile_type = tmp->tile_type;
+ pitch = align(tmp->pitch_in_blocks[0] * util_format_get_blockwidth(state->format), 8);
+ array_mode = tmp->array_mode[0];
+ tile_type = tmp->tile_type;
+
+ height = texture->height0;
+ depth = texture->depth0;
+ if (texture->target == PIPE_TEXTURE_1D_ARRAY) {
+ height = 1;
+ depth = texture->array_size;
+ } else if (texture->target == PIPE_TEXTURE_2D_ARRAY) {
+ depth = texture->array_size;
}
/* FIXME properly handle first level != 0 */
@@ -679,22 +476,22 @@ static struct pipe_sampler_view *r600_create_sampler_view(struct pipe_context *c
S_038000_PITCH((pitch / 8) - 1) |
S_038000_TEX_WIDTH(texture->width0 - 1), 0xFFFFFFFF, NULL);
r600_pipe_state_add_reg(rstate, R_038004_RESOURCE0_WORD1,
- S_038004_TEX_HEIGHT(texture->height0 - 1) |
- S_038004_TEX_DEPTH(texture->depth0 - 1) |
+ S_038004_TEX_HEIGHT(height - 1) |
+ S_038004_TEX_DEPTH(depth - 1) |
S_038004_DATA_FORMAT(format), 0xFFFFFFFF, NULL);
r600_pipe_state_add_reg(rstate, R_038008_RESOURCE0_WORD2,
(tmp->offset[0] + r600_bo_offset(bo[0])) >> 8, 0xFFFFFFFF, bo[0]);
r600_pipe_state_add_reg(rstate, R_03800C_RESOURCE0_WORD3,
(tmp->offset[1] + r600_bo_offset(bo[1])) >> 8, 0xFFFFFFFF, bo[1]);
r600_pipe_state_add_reg(rstate, R_038010_RESOURCE0_WORD4,
- word4 | S_038010_NUM_FORMAT_ALL(V_038010_SQ_NUM_FORMAT_NORM) |
- S_038010_SRF_MODE_ALL(V_038010_SFR_MODE_NO_ZERO) |
+ word4 |
+ S_038010_SRF_MODE_ALL(V_038010_SRF_MODE_NO_ZERO) |
S_038010_REQUEST_SIZE(1) |
S_038010_BASE_LEVEL(state->u.tex.first_level), 0xFFFFFFFF, NULL);
r600_pipe_state_add_reg(rstate, R_038014_RESOURCE0_WORD5,
S_038014_LAST_LEVEL(state->u.tex.last_level) |
- S_038014_BASE_ARRAY(0) |
- S_038014_LAST_ARRAY(0), 0xFFFFFFFF, NULL);
+ S_038014_BASE_ARRAY(state->u.tex.first_layer) |
+ S_038014_LAST_ARRAY(state->u.tex.last_layer), 0xFFFFFFFF, NULL);
r600_pipe_state_add_reg(rstate, R_038018_RESOURCE0_WORD6,
S_038018_TYPE(V_038010_SQ_TEX_VTX_VALID_TEXTURE), 0xFFFFFFFF, NULL);
@@ -709,7 +506,8 @@ static void r600_set_vs_sampler_view(struct pipe_context *ctx, unsigned count,
for (int i = 0; i < count; i++) {
if (resource[i]) {
- r600_context_pipe_state_set_vs_resource(&rctx->ctx, &resource[i]->state, i);
+ r600_context_pipe_state_set_vs_resource(&rctx->ctx, &resource[i]->state,
+ i + R600_MAX_CONST_BUFFERS);
}
}
}
@@ -724,9 +522,11 @@ static void r600_set_ps_sampler_view(struct pipe_context *ctx, unsigned count,
for (i = 0; i < count; i++) {
if (&rctx->ps_samplers.views[i]->base != views[i]) {
if (resource[i])
- r600_context_pipe_state_set_ps_resource(&rctx->ctx, &resource[i]->state, i);
+ r600_context_pipe_state_set_ps_resource(&rctx->ctx, &resource[i]->state,
+ i + R600_MAX_CONST_BUFFERS);
else
- r600_context_pipe_state_set_ps_resource(&rctx->ctx, NULL, i);
+ r600_context_pipe_state_set_ps_resource(&rctx->ctx, NULL,
+ i + R600_MAX_CONST_BUFFERS);
pipe_sampler_view_reference(
(struct pipe_sampler_view **)&rctx->ps_samplers.views[i],
@@ -736,7 +536,8 @@ static void r600_set_ps_sampler_view(struct pipe_context *ctx, unsigned count,
}
for (i = count; i < NUM_TEX_UNITS; i++) {
if (rctx->ps_samplers.views[i]) {
- r600_context_pipe_state_set_ps_resource(&rctx->ctx, NULL, i);
+ r600_context_pipe_state_set_ps_resource(&rctx->ctx, NULL,
+ i + R600_MAX_CONST_BUFFERS);
pipe_sampler_view_reference((struct pipe_sampler_view **)&rctx->ps_samplers.views[i], NULL);
}
}
@@ -918,33 +719,55 @@ static void r600_cb(struct r600_pipe_context *rctx, struct r600_pipe_state *rsta
unsigned offset;
const struct util_format_description *desc;
struct r600_bo *bo[3];
+ int i;
surf = (struct r600_surface *)state->cbufs[cb];
rtex = (struct r600_resource_texture*)state->cbufs[cb]->texture;
+
+ if (rtex->depth && !rtex->is_flushing_texture) {
+ r600_texture_depth_flush(&rctx->context, state->cbufs[cb]->texture, TRUE);
+ rtex = rtex->flushed_depth_texture;
+ }
+
rbuffer = &rtex->resource;
bo[0] = rbuffer->bo;
bo[1] = rbuffer->bo;
bo[2] = rbuffer->bo;
/* XXX quite sure for dx10+ hw don't need any offset hacks */
- offset = r600_texture_get_offset((struct r600_resource_texture *)state->cbufs[cb]->texture,
+ offset = r600_texture_get_offset(rtex,
level, state->cbufs[cb]->u.tex.first_layer);
- pitch = rtex->pitch_in_pixels[level] / 8 - 1;
- slice = rtex->pitch_in_pixels[level] * surf->aligned_height / 64 - 1;
+ pitch = rtex->pitch_in_blocks[level] / 8 - 1;
+ slice = rtex->pitch_in_blocks[level] * surf->aligned_height / 64 - 1;
ntype = 0;
- desc = util_format_description(rtex->resource.base.b.format);
+ desc = util_format_description(surf->base.format);
if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB)
ntype = V_0280A0_NUMBER_SRGB;
- format = r600_translate_colorformat(rtex->resource.base.b.format);
- swap = r600_translate_colorswap(rtex->resource.base.b.format);
+ for (i = 0; i < 4; i++) {
+ if (desc->channel[i].type != UTIL_FORMAT_TYPE_VOID) {
+ break;
+ }
+ }
+
+ format = r600_translate_colorformat(surf->base.format);
+ swap = r600_translate_colorswap(surf->base.format);
+
+ /* disable when gallium grows int textures */
+ if ((format == FMT_32_32_32_32 || format == FMT_16_16_16_16) && rtex->force_int_type)
+ ntype = 4;
+
color_info = S_0280A0_FORMAT(format) |
S_0280A0_COMP_SWAP(swap) |
S_0280A0_ARRAY_MODE(rtex->array_mode[level]) |
S_0280A0_BLEND_CLAMP(1) |
S_0280A0_NUMBER_TYPE(ntype);
- if (desc->colorspace != UTIL_FORMAT_COLORSPACE_ZS)
- color_info |= S_0280A0_SOURCE_FORMAT(1);
+
+ /* on R600 this can't be set if BLEND_CLAMP isn't set,
+ if BLEND_FLOAT32 is set of > 11 bits in a UNORM or SNORM */
+ if (desc->colorspace != UTIL_FORMAT_COLORSPACE_ZS &&
+ desc->channel[i].size < 12)
+ color_info |= S_0280A0_SOURCE_FORMAT(V_0280A0_EXPORT_NORM);
r600_pipe_state_add_reg(rstate,
R_028040_CB_COLOR0_BASE + cb * 4,
@@ -988,17 +811,14 @@ static void r600_db(struct r600_pipe_context *rctx, struct r600_pipe_state *rsta
surf = (struct r600_surface *)state->zsbuf;
rtex = (struct r600_resource_texture*)state->zsbuf->texture;
- rtex->tiled = 1;
- rtex->array_mode[level] = 2;
- rtex->tile_type = 1;
- rtex->depth = 1;
+
rbuffer = &rtex->resource;
/* XXX quite sure for dx10+ hw don't need any offset hacks */
offset = r600_texture_get_offset((struct r600_resource_texture *)state->zsbuf->texture,
level, state->zsbuf->u.tex.first_layer);
- pitch = rtex->pitch_in_pixels[level] / 8 - 1;
- slice = rtex->pitch_in_pixels[level] * surf->aligned_height / 64 - 1;
+ pitch = rtex->pitch_in_blocks[level] / 8 - 1;
+ slice = rtex->pitch_in_blocks[level] * surf->aligned_height / 64 - 1;
format = r600_translate_dbformat(state->zsbuf->texture->format);
r600_pipe_state_add_reg(rstate, R_02800C_DB_DEPTH_BASE,
@@ -1029,8 +849,6 @@ static void r600_set_framebuffer_state(struct pipe_context *ctx,
util_copy_framebuffer_state(&rctx->framebuffer, state);
- rctx->pframebuffer = &rctx->framebuffer;
-
/* build states */
for (int i = 0; i < state->nr_cbufs; i++) {
r600_cb(rctx, rstate, state, i);
@@ -1116,48 +934,6 @@ static void r600_set_framebuffer_state(struct pipe_context *ctx,
}
}
-static void r600_set_constant_buffer(struct pipe_context *ctx, uint shader, uint index,
- struct pipe_resource *buffer)
-{
- struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
- struct r600_resource *rbuffer = (struct r600_resource*)buffer;
-
- /* Note that the state tracker can unbind constant buffers by
- * passing NULL here.
- */
- if (buffer == NULL) {
- return;
- }
-
- switch (shader) {
- case PIPE_SHADER_VERTEX:
- rctx->vs_const_buffer.nregs = 0;
- r600_pipe_state_add_reg(&rctx->vs_const_buffer,
- R_028180_ALU_CONST_BUFFER_SIZE_VS_0,
- ALIGN_DIVUP(buffer->width0 >> 4, 16),
- 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(&rctx->vs_const_buffer,
- R_028980_ALU_CONST_CACHE_VS_0,
- r600_bo_offset(rbuffer->bo) >> 8, 0xFFFFFFFF, rbuffer->bo);
- r600_context_pipe_state_set(&rctx->ctx, &rctx->vs_const_buffer);
- break;
- case PIPE_SHADER_FRAGMENT:
- rctx->ps_const_buffer.nregs = 0;
- r600_pipe_state_add_reg(&rctx->ps_const_buffer,
- R_028140_ALU_CONST_BUFFER_SIZE_PS_0,
- ALIGN_DIVUP(buffer->width0 >> 4, 16),
- 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(&rctx->ps_const_buffer,
- R_028940_ALU_CONST_CACHE_PS_0,
- r600_bo_offset(rbuffer->bo) >> 8, 0xFFFFFFFF, rbuffer->bo);
- r600_context_pipe_state_set(&rctx->ctx, &rctx->ps_const_buffer);
- break;
- default:
- R600_ERR("unsupported %d\n", shader);
- return;
- }
-}
-
void r600_init_state_functions(struct r600_pipe_context *rctx)
{
rctx->context.create_blend_state = r600_create_blend_state;
@@ -1197,6 +973,7 @@ void r600_init_state_functions(struct r600_pipe_context *rctx)
rctx->context.set_vertex_sampler_views = r600_set_vs_sampler_view;
rctx->context.set_viewport_state = r600_set_viewport_state;
rctx->context.sampler_view_destroy = r600_sampler_view_destroy;
+ rctx->context.redefine_user_buffer = u_default_redefine_user_buffer;
}
void r600_init_config(struct r600_pipe_context *rctx)
@@ -1450,6 +1227,163 @@ void r600_init_config(struct r600_pipe_context *rctx)
r600_context_pipe_state_set(&rctx->ctx, rstate);
}
+void r600_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader *shader)
+{
+ struct r600_pipe_state *rstate = &shader->rstate;
+ struct r600_shader *rshader = &shader->shader;
+ unsigned i, exports_ps, num_cout, spi_ps_in_control_0, spi_input_z, spi_ps_in_control_1, db_shader_control;
+ int pos_index = -1, face_index = -1;
+
+ rstate->nregs = 0;
+
+ for (i = 0; i < rshader->ninput; i++) {
+ if (rshader->input[i].name == TGSI_SEMANTIC_POSITION)
+ pos_index = i;
+ if (rshader->input[i].name == TGSI_SEMANTIC_FACE)
+ face_index = i;
+ }
+
+ db_shader_control = 0;
+ for (i = 0; i < rshader->noutput; i++) {
+ if (rshader->output[i].name == TGSI_SEMANTIC_POSITION)
+ db_shader_control |= S_02880C_Z_EXPORT_ENABLE(1);
+ if (rshader->output[i].name == TGSI_SEMANTIC_STENCIL)
+ db_shader_control |= S_02880C_STENCIL_REF_EXPORT_ENABLE(1);
+ }
+ if (rshader->uses_kill)
+ db_shader_control |= S_02880C_KILL_ENABLE(1);
+
+ exports_ps = 0;
+ num_cout = 0;
+ for (i = 0; i < rshader->noutput; i++) {
+ if (rshader->output[i].name == TGSI_SEMANTIC_POSITION ||
+ rshader->output[i].name == TGSI_SEMANTIC_STENCIL)
+ exports_ps |= 1;
+ else if (rshader->output[i].name == TGSI_SEMANTIC_COLOR) {
+ num_cout++;
+ }
+ }
+ exports_ps |= S_028854_EXPORT_COLORS(num_cout);
+ if (!exports_ps) {
+ /* always at least export 1 component per pixel */
+ exports_ps = 2;
+ }
+
+ spi_ps_in_control_0 = S_0286CC_NUM_INTERP(rshader->ninput) |
+ S_0286CC_PERSP_GRADIENT_ENA(1);
+ spi_input_z = 0;
+ if (pos_index != -1) {
+ spi_ps_in_control_0 |= (S_0286CC_POSITION_ENA(1) |
+ S_0286CC_POSITION_CENTROID(rshader->input[pos_index].centroid) |
+ S_0286CC_POSITION_ADDR(rshader->input[pos_index].gpr) |
+ S_0286CC_BARYC_SAMPLE_CNTL(1));
+ spi_input_z |= 1;
+ }
+
+ spi_ps_in_control_1 = 0;
+ if (face_index != -1) {
+ spi_ps_in_control_1 |= S_0286D0_FRONT_FACE_ENA(1) |
+ S_0286D0_FRONT_FACE_ADDR(rshader->input[face_index].gpr);
+ }
+
+ r600_pipe_state_add_reg(rstate, R_0286CC_SPI_PS_IN_CONTROL_0, spi_ps_in_control_0, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_0286D0_SPI_PS_IN_CONTROL_1, spi_ps_in_control_1, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_0286D8_SPI_INPUT_Z, spi_input_z, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate,
+ R_028840_SQ_PGM_START_PS,
+ r600_bo_offset(shader->bo) >> 8, 0xFFFFFFFF, shader->bo);
+ r600_pipe_state_add_reg(rstate,
+ R_028850_SQ_PGM_RESOURCES_PS,
+ S_028868_NUM_GPRS(rshader->bc.ngpr) |
+ S_028868_STACK_SIZE(rshader->bc.nstack),
+ 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate,
+ R_028854_SQ_PGM_EXPORTS_PS,
+ exports_ps, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate,
+ R_0288CC_SQ_PGM_CF_OFFSET_PS,
+ 0x00000000, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_028808_CB_COLOR_CONTROL,
+ S_028808_MULTIWRITE_ENABLE(!!rshader->fs_write_all),
+ S_028808_MULTIWRITE_ENABLE(1),
+ NULL);
+ /* only set some bits here, the other bits are set in the dsa state */
+ r600_pipe_state_add_reg(rstate, R_02880C_DB_SHADER_CONTROL,
+ db_shader_control,
+ S_02880C_Z_EXPORT_ENABLE(1) |
+ S_02880C_STENCIL_REF_EXPORT_ENABLE(1) |
+ S_02880C_KILL_ENABLE(1),
+ NULL);
+
+ r600_pipe_state_add_reg(rstate,
+ R_03E200_SQ_LOOP_CONST_0, 0x01000FFF,
+ 0xFFFFFFFF, NULL);
+}
+
+void r600_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shader *shader)
+{
+ struct r600_pipe_state *rstate = &shader->rstate;
+ struct r600_shader *rshader = &shader->shader;
+ unsigned spi_vs_out_id[10];
+ unsigned i, tmp;
+
+ /* clear previous register */
+ rstate->nregs = 0;
+
+ /* so far never got proper semantic id from tgsi */
+ /* FIXME better to move this in config things so they get emited
+ * only one time per cs
+ */
+ for (i = 0; i < 10; i++) {
+ spi_vs_out_id[i] = 0;
+ }
+ for (i = 0; i < 32; i++) {
+ tmp = i << ((i & 3) * 8);
+ spi_vs_out_id[i / 4] |= tmp;
+ }
+ for (i = 0; i < 10; i++) {
+ r600_pipe_state_add_reg(rstate,
+ R_028614_SPI_VS_OUT_ID_0 + i * 4,
+ spi_vs_out_id[i], 0xFFFFFFFF, NULL);
+ }
+
+ r600_pipe_state_add_reg(rstate,
+ R_0286C4_SPI_VS_OUT_CONFIG,
+ S_0286C4_VS_EXPORT_COUNT(rshader->noutput - 2),
+ 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate,
+ R_028868_SQ_PGM_RESOURCES_VS,
+ S_028868_NUM_GPRS(rshader->bc.ngpr) |
+ S_028868_STACK_SIZE(rshader->bc.nstack),
+ 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate,
+ R_0288D0_SQ_PGM_CF_OFFSET_VS,
+ 0x00000000, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate,
+ R_028858_SQ_PGM_START_VS,
+ r600_bo_offset(shader->bo) >> 8, 0xFFFFFFFF, shader->bo);
+
+ r600_pipe_state_add_reg(rstate,
+ R_03E200_SQ_LOOP_CONST_0 + (32 * 4), 0x01000FFF,
+ 0xFFFFFFFF, NULL);
+}
+
+void r600_fetch_shader(struct r600_vertex_element *ve)
+{
+ struct r600_pipe_state *rstate;
+
+ rstate = &ve->rstate;
+ rstate->id = R600_PIPE_STATE_FETCH_SHADER;
+ rstate->nregs = 0;
+ r600_pipe_state_add_reg(rstate, R_0288A4_SQ_PGM_RESOURCES_FS,
+ 0x00000000, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_0288DC_SQ_PGM_CF_OFFSET_FS,
+ 0x00000000, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_028894_SQ_PGM_START_FS,
+ r600_bo_offset(ve->fetch_shader) >> 8,
+ 0xFFFFFFFF, ve->fetch_shader);
+}
+
void *r600_create_db_flush_dsa(struct r600_pipe_context *rctx)
{
struct pipe_depth_stencil_alpha_state dsa;
@@ -1487,3 +1421,25 @@ void *r600_create_db_flush_dsa(struct r600_pipe_context *rctx)
S_028D0C_COPY_CENTROID(1), NULL);
return rstate;
}
+
+void r600_pipe_set_buffer_resource(struct r600_pipe_context *rctx,
+ struct r600_pipe_state *rstate,
+ struct r600_resource *rbuffer,
+ unsigned offset, unsigned stride)
+{
+ r600_pipe_state_add_reg(rstate, R_038000_RESOURCE0_WORD0,
+ offset, 0xFFFFFFFF, rbuffer->bo);
+ r600_pipe_state_add_reg(rstate, R_038004_RESOURCE0_WORD1,
+ rbuffer->bo_size - offset - 1, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_038008_RESOURCE0_WORD2,
+ S_038008_STRIDE(stride),
+ 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_03800C_RESOURCE0_WORD3,
+ 0x00000000, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_038010_RESOURCE0_WORD4,
+ 0x00000000, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_038014_RESOURCE0_WORD5,
+ 0x00000000, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_038018_RESOURCE0_WORD6,
+ 0xC0000000, 0xFFFFFFFF, NULL);
+}
diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c
index c647e77b37..43dad0c802 100644
--- a/src/gallium/drivers/r600/r600_state_common.c
+++ b/src/gallium/drivers/r600/r600_state_common.c
@@ -27,7 +27,9 @@
#include <util/u_memory.h>
#include <util/u_format.h>
#include <pipebuffer/pb_buffer.h>
+#include "pipe/p_shader_tokens.h"
#include "r600_pipe.h"
+#include "r600d.h"
/* common state between evergreen and r600 */
void r600_bind_blend_state(struct pipe_context *ctx, void *state)
@@ -121,17 +123,11 @@ void r600_bind_vertex_elements(struct pipe_context *ctx, void *state)
rctx->vertex_elements = v;
if (v) {
+ u_vbuf_mgr_bind_vertex_elements(rctx->vbuf_mgr, state,
+ v->vmgr_elements);
+
rctx->states[v->rstate.id] = &v->rstate;
r600_context_pipe_state_set(&rctx->ctx, &v->rstate);
- if (rctx->family >= CHIP_CEDAR) {
- evergreen_vertex_buffer_update(rctx);
- } else {
- r600_vertex_buffer_update(rctx);
- }
- }
-
- if (v) {
-// rctx->vs_rebuild = TRUE;
}
}
@@ -147,6 +143,7 @@ void r600_delete_vertex_element(struct pipe_context *ctx, void *state)
rctx->vertex_elements = NULL;
r600_bo_reference(rctx->radeon, &v->fetch_shader, NULL);
+ u_vbuf_mgr_destroy_vertex_elements(rctx->vbuf_mgr, v->vmgr_elements);
FREE(state);
}
@@ -171,42 +168,28 @@ void r600_set_vertex_buffers(struct pipe_context *ctx, unsigned count,
const struct pipe_vertex_buffer *buffers)
{
struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
- struct pipe_vertex_buffer *vbo;
- unsigned max_index = (unsigned)-1;
-
- for (int i = 0; i < rctx->nvertex_buffer; i++) {
- pipe_resource_reference(&rctx->vertex_buffer[i].buffer, NULL);
- }
- memcpy(rctx->vertex_buffer, buffers, sizeof(struct pipe_vertex_buffer) * count);
-
- for (int i = 0; i < count; i++) {
- vbo = (struct pipe_vertex_buffer*)&buffers[i];
-
- rctx->vertex_buffer[i].buffer = NULL;
- if (r600_buffer_is_user_buffer(buffers[i].buffer))
- rctx->any_user_vbs = TRUE;
- pipe_resource_reference(&rctx->vertex_buffer[i].buffer, buffers[i].buffer);
+ int i;
- if (vbo->max_index == ~0) {
- if (!vbo->stride)
- vbo->max_index = 1;
- else
- vbo->max_index = (vbo->buffer->width0 - vbo->buffer_offset) / vbo->stride;
+ /* Zero states. */
+ for (i = 0; i < count; i++) {
+ if (!buffers[i].buffer) {
+ if (rctx->family >= CHIP_CEDAR) {
+ evergreen_context_pipe_state_set_fs_resource(&rctx->ctx, NULL, i);
+ } else {
+ r600_context_pipe_state_set_fs_resource(&rctx->ctx, NULL, i);
+ }
}
- max_index = MIN2(vbo->max_index, max_index);
}
- rctx->nvertex_buffer = count;
- rctx->vb_max_index = max_index;
- if (rctx->family >= CHIP_CEDAR) {
- evergreen_vertex_buffer_update(rctx);
- } else {
- r600_vertex_buffer_update(rctx);
+ for (; i < rctx->vbuf_mgr->nr_real_vertex_buffers; i++) {
+ if (rctx->family >= CHIP_CEDAR) {
+ evergreen_context_pipe_state_set_fs_resource(&rctx->ctx, NULL, i);
+ } else {
+ r600_context_pipe_state_set_fs_resource(&rctx->ctx, NULL, i);
+ }
}
-}
-
-#define FORMAT_REPLACE(what, withwhat) \
- case PIPE_FORMAT_##what: *format = PIPE_FORMAT_##withwhat; break
+ u_vbuf_mgr_set_vertex_buffers(rctx->vbuf_mgr, count, buffers);
+}
void *r600_create_vertex_elements(struct pipe_context *ctx,
unsigned count,
@@ -214,33 +197,15 @@ void *r600_create_vertex_elements(struct pipe_context *ctx,
{
struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
struct r600_vertex_element *v = CALLOC_STRUCT(r600_vertex_element);
- enum pipe_format *format;
- int i;
assert(count < 32);
if (!v)
return NULL;
v->count = count;
- memcpy(v->elements, elements, count * sizeof(struct pipe_vertex_element));
-
- for (i = 0; i < count; i++) {
- v->hw_format[i] = v->elements[i].src_format;
- format = &v->hw_format[i];
-
- switch (*format) {
- FORMAT_REPLACE(R64_FLOAT, R32_FLOAT);
- FORMAT_REPLACE(R64G64_FLOAT, R32G32_FLOAT);
- FORMAT_REPLACE(R64G64B64_FLOAT, R32G32B32_FLOAT);
- FORMAT_REPLACE(R64G64B64A64_FLOAT, R32G32B32A32_FLOAT);
- default:;
- }
- v->incompatible_layout =
- v->incompatible_layout ||
- v->elements[i].src_format != v->hw_format[i];
-
- v->hw_format_size[i] = align(util_format_get_blocksize(v->hw_format[i]), 4);
- }
+ v->vmgr_elements =
+ u_vbuf_mgr_create_vertex_elements(rctx->vbuf_mgr, count,
+ elements, v->elements);
if (r600_vertex_elements_build_fetch_shader(rctx, v)) {
FREE(v);
@@ -310,3 +275,274 @@ void r600_delete_vs_shader(struct pipe_context *ctx, void *state)
r600_pipe_shader_destroy(ctx, shader);
free(shader);
}
+
+/* FIXME optimize away spi update when it's not needed */
+void r600_spi_update(struct r600_pipe_context *rctx)
+{
+ struct r600_pipe_shader *shader = rctx->ps_shader;
+ struct r600_pipe_state rstate;
+ struct r600_shader *rshader = &shader->shader;
+ unsigned i, tmp;
+
+ rstate.nregs = 0;
+ for (i = 0; i < rshader->ninput; i++) {
+ tmp = S_028644_SEMANTIC(r600_find_vs_semantic_index(&rctx->vs_shader->shader, rshader, i));
+
+ if (rshader->input[i].name == TGSI_SEMANTIC_COLOR ||
+ rshader->input[i].name == TGSI_SEMANTIC_BCOLOR ||
+ rshader->input[i].name == TGSI_SEMANTIC_POSITION) {
+ tmp |= S_028644_FLAT_SHADE(rctx->flatshade);
+ }
+
+ if (rshader->input[i].name == TGSI_SEMANTIC_GENERIC &&
+ rctx->sprite_coord_enable & (1 << rshader->input[i].sid)) {
+ tmp |= S_028644_PT_SPRITE_TEX(1);
+ }
+
+ if (rctx->family < CHIP_CEDAR) {
+ if (rshader->input[i].centroid)
+ tmp |= S_028644_SEL_CENTROID(1);
+
+ if (rshader->input[i].interpolate == TGSI_INTERPOLATE_LINEAR)
+ tmp |= S_028644_SEL_LINEAR(1);
+ }
+
+ r600_pipe_state_add_reg(&rstate, R_028644_SPI_PS_INPUT_CNTL_0 + i * 4, tmp, 0xFFFFFFFF, NULL);
+ }
+ r600_context_pipe_state_set(&rctx->ctx, &rstate);
+}
+
+void r600_set_constant_buffer(struct pipe_context *ctx, uint shader, uint index,
+ struct pipe_resource *buffer)
+{
+ struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
+ struct r600_resource_buffer *rbuffer = r600_buffer(buffer);
+ struct r600_pipe_state *rstate;
+ uint32_t offset;
+
+ /* Note that the state tracker can unbind constant buffers by
+ * passing NULL here.
+ */
+ if (buffer == NULL) {
+ return;
+ }
+
+ r600_upload_const_buffer(rctx, &rbuffer, &offset);
+ offset += r600_bo_offset(rbuffer->r.bo);
+
+ switch (shader) {
+ case PIPE_SHADER_VERTEX:
+ rctx->vs_const_buffer.nregs = 0;
+ r600_pipe_state_add_reg(&rctx->vs_const_buffer,
+ R_028180_ALU_CONST_BUFFER_SIZE_VS_0,
+ ALIGN_DIVUP(buffer->width0 >> 4, 16),
+ 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(&rctx->vs_const_buffer,
+ R_028980_ALU_CONST_CACHE_VS_0,
+ offset >> 8, 0xFFFFFFFF, rbuffer->r.bo);
+ r600_context_pipe_state_set(&rctx->ctx, &rctx->vs_const_buffer);
+
+ rstate = &rctx->vs_const_buffer_resource[index];
+ rstate->id = R600_PIPE_STATE_RESOURCE;
+ rstate->nregs = 0;
+ if (rctx->family >= CHIP_CEDAR) {
+ evergreen_pipe_set_buffer_resource(rctx, rstate, &rbuffer->r, offset, 16);
+ evergreen_context_pipe_state_set_vs_resource(&rctx->ctx, rstate, index);
+ } else {
+ r600_pipe_set_buffer_resource(rctx, rstate, &rbuffer->r, offset, 16);
+ r600_context_pipe_state_set_vs_resource(&rctx->ctx, rstate, index);
+ }
+ break;
+ case PIPE_SHADER_FRAGMENT:
+ rctx->ps_const_buffer.nregs = 0;
+ r600_pipe_state_add_reg(&rctx->ps_const_buffer,
+ R_028140_ALU_CONST_BUFFER_SIZE_PS_0,
+ ALIGN_DIVUP(buffer->width0 >> 4, 16),
+ 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(&rctx->ps_const_buffer,
+ R_028940_ALU_CONST_CACHE_PS_0,
+ offset >> 8, 0xFFFFFFFF, rbuffer->r.bo);
+ r600_context_pipe_state_set(&rctx->ctx, &rctx->ps_const_buffer);
+
+ rstate = &rctx->ps_const_buffer_resource[index];
+ rstate->id = R600_PIPE_STATE_RESOURCE;
+ rstate->nregs = 0;
+ if (rctx->family >= CHIP_CEDAR) {
+ evergreen_pipe_set_buffer_resource(rctx, rstate, &rbuffer->r, offset, 16);
+ evergreen_context_pipe_state_set_ps_resource(&rctx->ctx, rstate, index);
+ } else {
+ r600_pipe_set_buffer_resource(rctx, rstate, &rbuffer->r, offset, 16);
+ r600_context_pipe_state_set_ps_resource(&rctx->ctx, rstate, index);
+ }
+ break;
+ default:
+ R600_ERR("unsupported %d\n", shader);
+ return;
+ }
+
+ if (buffer != &rbuffer->r.b.b.b)
+ pipe_resource_reference((struct pipe_resource**)&rbuffer, NULL);
+}
+
+static void r600_vertex_buffer_update(struct r600_pipe_context *rctx)
+{
+ struct r600_pipe_state *rstate;
+ struct r600_resource *rbuffer;
+ struct pipe_vertex_buffer *vertex_buffer;
+ unsigned i, count, offset;
+
+ if (rctx->vertex_elements->vbuffer_need_offset) {
+ /* one resource per vertex elements */
+ count = rctx->vertex_elements->count;
+ } else {
+ /* bind vertex buffer once */
+ count = rctx->vbuf_mgr->nr_real_vertex_buffers;
+ }
+
+ for (i = 0 ; i < count; i++) {
+ rstate = &rctx->fs_resource[i];
+ rstate->id = R600_PIPE_STATE_RESOURCE;
+ rstate->nregs = 0;
+
+ if (rctx->vertex_elements->vbuffer_need_offset) {
+ /* one resource per vertex elements */
+ unsigned vbuffer_index;
+ vbuffer_index = rctx->vertex_elements->elements[i].vertex_buffer_index;
+ vertex_buffer = &rctx->vbuf_mgr->vertex_buffer[vbuffer_index];
+ rbuffer = (struct r600_resource*)rctx->vbuf_mgr->real_vertex_buffer[vbuffer_index];
+ offset = rctx->vertex_elements->vbuffer_offset[i];
+ } else {
+ /* bind vertex buffer once */
+ vertex_buffer = &rctx->vbuf_mgr->vertex_buffer[i];
+ rbuffer = (struct r600_resource*)rctx->vbuf_mgr->real_vertex_buffer[i];
+ offset = 0;
+ }
+ if (vertex_buffer == NULL || rbuffer == NULL)
+ continue;
+ offset += vertex_buffer->buffer_offset + r600_bo_offset(rbuffer->bo);
+
+ if (rctx->family >= CHIP_CEDAR) {
+ evergreen_pipe_set_buffer_resource(rctx, rstate, rbuffer, offset, vertex_buffer->stride);
+ evergreen_context_pipe_state_set_fs_resource(&rctx->ctx, rstate, i);
+ } else {
+ r600_pipe_set_buffer_resource(rctx, rstate, rbuffer, offset, vertex_buffer->stride);
+ r600_context_pipe_state_set_fs_resource(&rctx->ctx, rstate, i);
+ }
+ }
+}
+
+void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
+{
+ struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
+ struct r600_resource *rbuffer;
+ u32 vgt_dma_index_type, vgt_draw_initiator, mask;
+ struct r600_draw rdraw;
+ struct r600_pipe_state vgt;
+ struct r600_drawl draw = {};
+ unsigned prim;
+
+ r600_flush_depth_textures(rctx);
+ u_vbuf_mgr_draw_begin(rctx->vbuf_mgr, info, NULL, NULL);
+ r600_vertex_buffer_update(rctx);
+
+ draw.info = *info;
+ draw.ctx = ctx;
+ if (info->indexed && rctx->index_buffer.buffer) {
+ draw.info.start += rctx->index_buffer.offset / rctx->index_buffer.index_size;
+ pipe_resource_reference(&draw.index_buffer, rctx->index_buffer.buffer);
+
+ r600_translate_index_buffer(rctx, &draw.index_buffer,
+ &rctx->index_buffer.index_size,
+ &draw.info.start,
+ info->count);
+
+ draw.index_size = rctx->index_buffer.index_size;
+ draw.index_buffer_offset = draw.info.start * draw.index_size;
+ draw.info.start = 0;
+
+ if (u_vbuf_resource(draw.index_buffer)->user_ptr) {
+ r600_upload_index_buffer(rctx, &draw);
+ }
+ } else {
+ draw.info.index_bias = info->start;
+ }
+
+ switch (draw.index_size) {
+ case 2:
+ vgt_draw_initiator = 0;
+ vgt_dma_index_type = 0;
+ break;
+ case 4:
+ vgt_draw_initiator = 0;
+ vgt_dma_index_type = 1;
+ break;
+ case 0:
+ vgt_draw_initiator = 2;
+ vgt_dma_index_type = 0;
+ break;
+ default:
+ R600_ERR("unsupported index size %d\n", draw.index_size);
+ return;
+ }
+ if (r600_conv_pipe_prim(draw.info.mode, &prim))
+ return;
+ if (unlikely(rctx->ps_shader == NULL)) {
+ R600_ERR("missing vertex shader\n");
+ return;
+ }
+ if (unlikely(rctx->vs_shader == NULL)) {
+ R600_ERR("missing vertex shader\n");
+ return;
+ }
+ /* there should be enough input */
+ if (rctx->vertex_elements->count < rctx->vs_shader->shader.bc.nresource) {
+ R600_ERR("%d resources provided, expecting %d\n",
+ rctx->vertex_elements->count, rctx->vs_shader->shader.bc.nresource);
+ return;
+ }
+
+ r600_spi_update(rctx);
+
+ mask = 0;
+ for (int i = 0; i < rctx->framebuffer.nr_cbufs; i++) {
+ mask |= (0xF << (i * 4));
+ }
+
+ vgt.id = R600_PIPE_STATE_VGT;
+ vgt.nregs = 0;
+ r600_pipe_state_add_reg(&vgt, R_008958_VGT_PRIMITIVE_TYPE, prim, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(&vgt, R_028408_VGT_INDX_OFFSET, draw.info.index_bias, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(&vgt, R_028400_VGT_MAX_VTX_INDX, draw.info.max_index, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(&vgt, R_028404_VGT_MIN_VTX_INDX, draw.info.min_index, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(&vgt, R_028238_CB_TARGET_MASK, rctx->cb_target_mask & mask, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(&vgt, R_03CFF0_SQ_VTX_BASE_VTX_LOC, 0, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(&vgt, R_03CFF4_SQ_VTX_START_INST_LOC, draw.info.start_instance, 0xFFFFFFFF, NULL);
+ r600_context_pipe_state_set(&rctx->ctx, &vgt);
+
+ rdraw.vgt_num_indices = draw.info.count;
+ rdraw.vgt_num_instances = draw.info.instance_count;
+ rdraw.vgt_index_type = vgt_dma_index_type;
+ rdraw.vgt_draw_initiator = vgt_draw_initiator;
+ rdraw.indices = NULL;
+ if (draw.index_buffer) {
+ rbuffer = (struct r600_resource*)draw.index_buffer;
+ rdraw.indices = rbuffer->bo;
+ rdraw.indices_bo_offset = draw.index_buffer_offset;
+ }
+
+ if (rctx->family >= CHIP_CEDAR) {
+ evergreen_context_draw(&rctx->ctx, &rdraw);
+ } else {
+ r600_context_draw(&rctx->ctx, &rdraw);
+ }
+
+ if (rctx->framebuffer.zsbuf)
+ {
+ struct pipe_resource *tex = rctx->framebuffer.zsbuf->texture;
+ ((struct r600_resource_texture *)tex)->dirty_db = TRUE;
+ }
+
+ pipe_resource_reference(&draw.index_buffer, NULL);
+
+ u_vbuf_mgr_draw_end(rctx->vbuf_mgr);
+}
diff --git a/src/gallium/drivers/r600/r600_state_inlines.h b/src/gallium/drivers/r600/r600_state_inlines.h
index d994196e19..3d0360485a 100644
--- a/src/gallium/drivers/r600/r600_state_inlines.h
+++ b/src/gallium/drivers/r600/r600_state_inlines.h
@@ -253,9 +253,13 @@ static inline unsigned r600_tex_dim(unsigned dim)
default:
case PIPE_TEXTURE_1D:
return V_038000_SQ_TEX_DIM_1D;
+ case PIPE_TEXTURE_1D_ARRAY:
+ return V_038000_SQ_TEX_DIM_1D_ARRAY;
case PIPE_TEXTURE_2D:
case PIPE_TEXTURE_RECT:
return V_038000_SQ_TEX_DIM_2D;
+ case PIPE_TEXTURE_2D_ARRAY:
+ return V_038000_SQ_TEX_DIM_2D_ARRAY;
case PIPE_TEXTURE_3D:
return V_038000_SQ_TEX_DIM_3D;
case PIPE_TEXTURE_CUBE:
@@ -285,10 +289,14 @@ static inline uint32_t r600_translate_colorswap(enum pipe_format format)
return V_0280A0_SWAP_ALT_REV;
case PIPE_FORMAT_I8_UNORM:
case PIPE_FORMAT_L8_UNORM:
+ case PIPE_FORMAT_L8_SRGB:
case PIPE_FORMAT_R8_UNORM:
case PIPE_FORMAT_R8_SNORM:
return V_0280A0_SWAP_STD;
+ case PIPE_FORMAT_L4A4_UNORM:
+ return V_0280A0_SWAP_ALT;
+
/* 16-bit buffers. */
case PIPE_FORMAT_B5G6R5_UNORM:
return V_0280A0_SWAP_STD_REV;
@@ -305,6 +313,7 @@ static inline uint32_t r600_translate_colorswap(enum pipe_format format)
return V_0280A0_SWAP_STD;
case PIPE_FORMAT_L8A8_UNORM:
+ case PIPE_FORMAT_L8A8_SRGB:
return V_0280A0_SWAP_ALT;
case PIPE_FORMAT_R8G8_UNORM:
return V_0280A0_SWAP_STD;
@@ -327,6 +336,7 @@ static inline uint32_t r600_translate_colorswap(enum pipe_format format)
case PIPE_FORMAT_X8R8G8B8_UNORM:
return V_0280A0_SWAP_ALT_REV;
case PIPE_FORMAT_R8G8B8A8_SNORM:
+ case PIPE_FORMAT_R8G8B8A8_UNORM:
case PIPE_FORMAT_R8G8B8X8_UNORM:
return V_0280A0_SWAP_STD;
@@ -345,9 +355,11 @@ static inline uint32_t r600_translate_colorswap(enum pipe_format format)
case PIPE_FORMAT_R10G10B10A2_UNORM:
case PIPE_FORMAT_R10G10B10X2_SNORM:
- case PIPE_FORMAT_B10G10R10A2_UNORM:
case PIPE_FORMAT_R10SG10SB10SA2U_NORM:
- return V_0280A0_SWAP_STD_REV;
+ return V_0280A0_SWAP_STD;
+
+ case PIPE_FORMAT_B10G10R10A2_UNORM:
+ return V_0280A0_SWAP_ALT;
case PIPE_FORMAT_R16G16_UNORM:
return V_0280A0_SWAP_STD;
@@ -355,14 +367,13 @@ static inline uint32_t r600_translate_colorswap(enum pipe_format format)
/* 64-bit buffers. */
case PIPE_FORMAT_R16G16B16A16_UNORM:
case PIPE_FORMAT_R16G16B16A16_SNORM:
- // return FMT_16_16_16_16;
case PIPE_FORMAT_R16G16B16A16_FLOAT:
- // return FMT_16_16_16_16_FLOAT;
/* 128-bit buffers. */
case PIPE_FORMAT_R32G32B32A32_FLOAT:
- // return FMT_32_32_32_32_FLOAT;
- return 0;
+ case PIPE_FORMAT_R32G32B32A32_SNORM:
+ case PIPE_FORMAT_R32G32B32A32_UNORM:
+ return V_0280A0_SWAP_STD;
default:
R600_ERR("unsupported colorswap format %d\n", format);
return ~0;
@@ -373,10 +384,14 @@ static inline uint32_t r600_translate_colorswap(enum pipe_format format)
static INLINE uint32_t r600_translate_colorformat(enum pipe_format format)
{
switch (format) {
+ case PIPE_FORMAT_L4A4_UNORM:
+ return V_0280A0_COLOR_4_4;
+
/* 8-bit buffers. */
case PIPE_FORMAT_A8_UNORM:
case PIPE_FORMAT_I8_UNORM:
case PIPE_FORMAT_L8_UNORM:
+ case PIPE_FORMAT_L8_SRGB:
case PIPE_FORMAT_R8_UNORM:
case PIPE_FORMAT_R8_SNORM:
return V_0280A0_COLOR_8;
@@ -397,6 +412,7 @@ static INLINE uint32_t r600_translate_colorformat(enum pipe_format format)
return V_0280A0_COLOR_16;
case PIPE_FORMAT_L8A8_UNORM:
+ case PIPE_FORMAT_L8A8_SRGB:
case PIPE_FORMAT_R8G8_UNORM:
return V_0280A0_COLOR_8_8;
@@ -423,7 +439,7 @@ static INLINE uint32_t r600_translate_colorformat(enum pipe_format format)
case PIPE_FORMAT_R10G10B10X2_SNORM:
case PIPE_FORMAT_B10G10R10A2_UNORM:
case PIPE_FORMAT_R10SG10SB10SA2U_NORM:
- return V_0280A0_COLOR_10_10_10_2;
+ return V_0280A0_COLOR_2_10_10_10;
case PIPE_FORMAT_Z24X8_UNORM:
case PIPE_FORMAT_Z24_UNORM_S8_USCALED:
@@ -469,6 +485,9 @@ static INLINE uint32_t r600_translate_colorformat(enum pipe_format format)
return V_0280A0_COLOR_32_32_32_FLOAT;
case PIPE_FORMAT_R32G32B32A32_FLOAT:
return V_0280A0_COLOR_32_32_32_32_FLOAT;
+ case PIPE_FORMAT_R32G32B32A32_SNORM:
+ case PIPE_FORMAT_R32G32B32A32_UNORM:
+ return V_0280A0_COLOR_32_32_32_32;
/* YUV buffers. */
case PIPE_FORMAT_UYVY:
@@ -479,9 +498,9 @@ static INLINE uint32_t r600_translate_colorformat(enum pipe_format format)
}
}
-static INLINE boolean r600_is_sampler_format_supported(enum pipe_format format)
+static INLINE boolean r600_is_sampler_format_supported(struct pipe_screen *screen, enum pipe_format format)
{
- return r600_translate_texformat(format, NULL, NULL, NULL) != ~0;
+ return r600_translate_texformat(screen, format, NULL, NULL, NULL) != ~0;
}
static INLINE boolean r600_is_colorbuffer_format_supported(enum pipe_format format)
@@ -495,21 +514,13 @@ static INLINE boolean r600_is_zs_format_supported(enum pipe_format format)
return r600_translate_dbformat(format) != ~0;
}
-static INLINE boolean r600_is_vertex_format_supported(enum pipe_format format)
+static INLINE boolean r600_is_vertex_format_supported(enum pipe_format format,
+ enum radeon_family family)
{
- return r600_translate_colorformat(format) != ~0;
-}
-
-static INLINE uint32_t r600_translate_vertex_data_type(enum pipe_format format)
-{
- uint32_t result = 0;
- const struct util_format_description *desc;
unsigned i;
-
- desc = util_format_description(format);
- if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN) {
- goto out_unknown;
- }
+ const struct util_format_description *desc = util_format_description(format);
+ if (!desc)
+ return FALSE;
/* Find the first non-VOID channel. */
for (i = 0; i < 4; i++) {
@@ -517,122 +528,23 @@ static INLINE uint32_t r600_translate_vertex_data_type(enum pipe_format format)
break;
}
}
-
- switch (desc->channel[i].type) {
- /* Half-floats, floats, doubles */
- case UTIL_FORMAT_TYPE_FLOAT:
- switch (desc->channel[i].size) {
- case 16:
- switch (desc->nr_channels) {
- case 1:
- result = FMT_16_FLOAT;
- break;
- case 2:
- result = FMT_16_16_FLOAT;
- break;
- case 3:
- result = FMT_16_16_16_FLOAT;
- break;
- case 4:
- result = FMT_16_16_16_16_FLOAT;
- break;
- }
- break;
- case 32:
- switch (desc->nr_channels) {
- case 1:
- result = FMT_32_FLOAT;
- break;
- case 2:
- result = FMT_32_32_FLOAT;
- break;
- case 3:
- result = FMT_32_32_32_FLOAT;
- break;
- case 4:
- result = FMT_32_32_32_32_FLOAT;
- break;
- }
- break;
- default:
- goto out_unknown;
- }
- break;
- /* Unsigned ints */
- case UTIL_FORMAT_TYPE_UNSIGNED:
- /* Signed ints */
- case UTIL_FORMAT_TYPE_SIGNED:
- switch (desc->channel[i].size) {
- case 8:
- switch (desc->nr_channels) {
- case 1:
- result = FMT_8;
- break;
- case 2:
- result = FMT_8_8;
- break;
- case 3:
- // result = FMT_8_8_8; /* fails piglit draw-vertices test */
- // break;
- case 4:
- result = FMT_8_8_8_8;
- break;
- }
- break;
- case 16:
- switch (desc->nr_channels) {
- case 1:
- result = FMT_16;
- break;
- case 2:
- result = FMT_16_16;
- break;
- case 3:
- // result = FMT_16_16_16; /* fails piglit draw-vertices test */
- // break;
- case 4:
- result = FMT_16_16_16_16;
- break;
- }
- break;
- case 32:
- switch (desc->nr_channels) {
- case 1:
- result = FMT_32;
- break;
- case 2:
- result = FMT_32_32;
- break;
- case 3:
- result = FMT_32_32_32;
- break;
- case 4:
- result = FMT_32_32_32_32;
- break;
- }
- break;
- default:
- goto out_unknown;
- }
- break;
- default:
- goto out_unknown;
- }
-
- result = S_038008_DATA_FORMAT(result);
-
- if (desc->channel[i].type == UTIL_FORMAT_TYPE_SIGNED) {
- result |= S_038008_FORMAT_COMP_ALL(1);
- }
- if (desc->channel[i].normalized) {
- result |= S_038008_NUM_FORMAT_ALL(0);
- } else {
- result |= S_038008_NUM_FORMAT_ALL(2);
- }
- return result;
-out_unknown:
- R600_ERR("unsupported vertex format %s\n", util_format_name(format));
- return ~0;
+ if (i == 4)
+ return FALSE;
+
+ /* No fixed, no double. */
+ if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN ||
+ desc->channel[i].type == UTIL_FORMAT_TYPE_FIXED ||
+ (desc->channel[i].size == 64 &&
+ desc->channel[i].type == UTIL_FORMAT_TYPE_FLOAT))
+ return FALSE;
+
+ /* No scaled/norm formats with 32 bits per channel. */
+ if (desc->channel[i].size == 32 &&
+ (desc->channel[i].type == UTIL_FORMAT_TYPE_SIGNED ||
+ desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED))
+ return FALSE;
+
+ return TRUE;
}
#endif
diff --git a/src/gallium/drivers/r600/r600_states_inc.h b/src/gallium/drivers/r600/r600_states_inc.h
deleted file mode 100644
index 1c8075ebdb..0000000000
--- a/src/gallium/drivers/r600/r600_states_inc.h
+++ /dev/null
@@ -1,543 +0,0 @@
-/* This file is autogenerated from r600_states.h - do not edit directly */
-/* autogenerating script is gen_r600_states.py */
-
-/* R600_CONFIG */
-#define R600_CONFIG__SQ_CONFIG 0
-#define R600_CONFIG__SQ_GPR_RESOURCE_MGMT_1 1
-#define R600_CONFIG__SQ_GPR_RESOURCE_MGMT_2 2
-#define R600_CONFIG__SQ_THREAD_RESOURCE_MGMT 3
-#define R600_CONFIG__SQ_STACK_RESOURCE_MGMT_1 4
-#define R600_CONFIG__SQ_STACK_RESOURCE_MGMT_2 5
-#define R600_CONFIG__SQ_DYN_GPR_CNTL_PS_FLUSH_REQ 6
-#define R600_CONFIG__TA_CNTL_AUX 7
-#define R600_CONFIG__VC_ENHANCE 8
-#define R600_CONFIG__DB_DEBUG 9
-#define R600_CONFIG__DB_WATERMARKS 10
-#define R600_CONFIG__SX_MISC 11
-#define R600_CONFIG__SPI_THREAD_GROUPING 12
-#define R600_CONFIG__SQ_ESGS_RING_ITEMSIZE 13
-#define R600_CONFIG__SQ_GSVS_RING_ITEMSIZE 14
-#define R600_CONFIG__SQ_ESTMP_RING_ITEMSIZE 15
-#define R600_CONFIG__SQ_GSTMP_RING_ITEMSIZE 16
-#define R600_CONFIG__SQ_VSTMP_RING_ITEMSIZE 17
-#define R600_CONFIG__SQ_PSTMP_RING_ITEMSIZE 18
-#define R600_CONFIG__SQ_FBUF_RING_ITEMSIZE 19
-#define R600_CONFIG__SQ_REDUC_RING_ITEMSIZE 20
-#define R600_CONFIG__SQ_GS_VERT_ITEMSIZE 21
-#define R600_CONFIG__VGT_OUTPUT_PATH_CNTL 22
-#define R600_CONFIG__VGT_HOS_CNTL 23
-#define R600_CONFIG__VGT_HOS_MAX_TESS_LEVEL 24
-#define R600_CONFIG__VGT_HOS_MIN_TESS_LEVEL 25
-#define R600_CONFIG__VGT_HOS_REUSE_DEPTH 26
-#define R600_CONFIG__VGT_GROUP_PRIM_TYPE 27
-#define R600_CONFIG__VGT_GROUP_FIRST_DECR 28
-#define R600_CONFIG__VGT_GROUP_DECR 29
-#define R600_CONFIG__VGT_GROUP_VECT_0_CNTL 30
-#define R600_CONFIG__VGT_GROUP_VECT_1_CNTL 31
-#define R600_CONFIG__VGT_GROUP_VECT_0_FMT_CNTL 32
-#define R600_CONFIG__VGT_GROUP_VECT_1_FMT_CNTL 33
-#define R600_CONFIG__VGT_GS_MODE 34
-#define R600_CONFIG__PA_SC_MODE_CNTL 35
-#define R600_CONFIG__VGT_STRMOUT_EN 36
-#define R600_CONFIG__VGT_REUSE_OFF 37
-#define R600_CONFIG__VGT_VTX_CNT_EN 38
-#define R600_CONFIG__VGT_STRMOUT_BUFFER_EN 39
-#define R600_CONFIG_SIZE 40
-#define R600_CONFIG_PM4 128
-
-/* R600_CB_CNTL */
-#define R600_CB_CNTL__CB_CLEAR_RED 0
-#define R600_CB_CNTL__CB_CLEAR_GREEN 1
-#define R600_CB_CNTL__CB_CLEAR_BLUE 2
-#define R600_CB_CNTL__CB_CLEAR_ALPHA 3
-#define R600_CB_CNTL__CB_SHADER_MASK 4
-#define R600_CB_CNTL__CB_TARGET_MASK 5
-#define R600_CB_CNTL__CB_FOG_RED 6
-#define R600_CB_CNTL__CB_FOG_GREEN 7
-#define R600_CB_CNTL__CB_FOG_BLUE 8
-#define R600_CB_CNTL__CB_COLOR_CONTROL 9
-#define R600_CB_CNTL__PA_SC_AA_CONFIG 10
-#define R600_CB_CNTL__PA_SC_AA_SAMPLE_LOCS_MCTX 11
-#define R600_CB_CNTL__PA_SC_AA_SAMPLE_LOCS_8S_WD1_MCTX 12
-#define R600_CB_CNTL__CB_CLRCMP_CONTROL 13
-#define R600_CB_CNTL__CB_CLRCMP_SRC 14
-#define R600_CB_CNTL__CB_CLRCMP_DST 15
-#define R600_CB_CNTL__CB_CLRCMP_MSK 16
-#define R600_CB_CNTL__PA_SC_AA_MASK 17
-#define R600_CB_CNTL__CB_SHADER_CONTROL 18
-#define R600_CB_CNTL_SIZE 19
-#define R600_CB_CNTL_PM4 128
-
-/* R600_RASTERIZER */
-#define R600_RASTERIZER__SPI_INTERP_CONTROL_0 0
-#define R600_RASTERIZER__PA_CL_CLIP_CNTL 1
-#define R600_RASTERIZER__PA_SU_SC_MODE_CNTL 2
-#define R600_RASTERIZER__PA_CL_VS_OUT_CNTL 3
-#define R600_RASTERIZER__PA_CL_NANINF_CNTL 4
-#define R600_RASTERIZER__PA_SU_POINT_SIZE 5
-#define R600_RASTERIZER__PA_SU_POINT_MINMAX 6
-#define R600_RASTERIZER__PA_SU_LINE_CNTL 7
-#define R600_RASTERIZER__PA_SC_LINE_STIPPLE 8
-#define R600_RASTERIZER__PA_SC_MPASS_PS_CNTL 9
-#define R600_RASTERIZER__PA_SC_LINE_CNTL 10
-#define R600_RASTERIZER__PA_CL_GB_VERT_CLIP_ADJ 11
-#define R600_RASTERIZER__PA_CL_GB_VERT_DISC_ADJ 12
-#define R600_RASTERIZER__PA_CL_GB_HORZ_CLIP_ADJ 13
-#define R600_RASTERIZER__PA_CL_GB_HORZ_DISC_ADJ 14
-#define R600_RASTERIZER__PA_SU_POLY_OFFSET_DB_FMT_CNTL 15
-#define R600_RASTERIZER__PA_SU_POLY_OFFSET_CLAMP 16
-#define R600_RASTERIZER__PA_SU_POLY_OFFSET_FRONT_SCALE 17
-#define R600_RASTERIZER__PA_SU_POLY_OFFSET_FRONT_OFFSET 18
-#define R600_RASTERIZER__PA_SU_POLY_OFFSET_BACK_SCALE 19
-#define R600_RASTERIZER__PA_SU_POLY_OFFSET_BACK_OFFSET 20
-#define R600_RASTERIZER_SIZE 21
-#define R600_RASTERIZER_PM4 128
-
-/* R600_VIEWPORT */
-#define R600_VIEWPORT__PA_SC_VPORT_ZMIN_0 0
-#define R600_VIEWPORT__PA_SC_VPORT_ZMAX_0 1
-#define R600_VIEWPORT__PA_CL_VPORT_XSCALE_0 2
-#define R600_VIEWPORT__PA_CL_VPORT_YSCALE_0 3
-#define R600_VIEWPORT__PA_CL_VPORT_ZSCALE_0 4
-#define R600_VIEWPORT__PA_CL_VPORT_XOFFSET_0 5
-#define R600_VIEWPORT__PA_CL_VPORT_YOFFSET_0 6
-#define R600_VIEWPORT__PA_CL_VPORT_ZOFFSET_0 7
-#define R600_VIEWPORT__PA_CL_VTE_CNTL 8
-#define R600_VIEWPORT_SIZE 9
-#define R600_VIEWPORT_PM4 128
-
-/* R600_SCISSOR */
-#define R600_SCISSOR__PA_SC_SCREEN_SCISSOR_TL 0
-#define R600_SCISSOR__PA_SC_SCREEN_SCISSOR_BR 1
-#define R600_SCISSOR__PA_SC_WINDOW_OFFSET 2
-#define R600_SCISSOR__PA_SC_WINDOW_SCISSOR_TL 3
-#define R600_SCISSOR__PA_SC_WINDOW_SCISSOR_BR 4
-#define R600_SCISSOR__PA_SC_CLIPRECT_RULE 5
-#define R600_SCISSOR__PA_SC_CLIPRECT_0_TL 6
-#define R600_SCISSOR__PA_SC_CLIPRECT_0_BR 7
-#define R600_SCISSOR__PA_SC_CLIPRECT_1_TL 8
-#define R600_SCISSOR__PA_SC_CLIPRECT_1_BR 9
-#define R600_SCISSOR__PA_SC_CLIPRECT_2_TL 10
-#define R600_SCISSOR__PA_SC_CLIPRECT_2_BR 11
-#define R600_SCISSOR__PA_SC_CLIPRECT_3_TL 12
-#define R600_SCISSOR__PA_SC_CLIPRECT_3_BR 13
-#define R600_SCISSOR__PA_SC_EDGERULE 14
-#define R600_SCISSOR__PA_SC_GENERIC_SCISSOR_TL 15
-#define R600_SCISSOR__PA_SC_GENERIC_SCISSOR_BR 16
-#define R600_SCISSOR__PA_SC_VPORT_SCISSOR_0_TL 17
-#define R600_SCISSOR__PA_SC_VPORT_SCISSOR_0_BR 18
-#define R600_SCISSOR_SIZE 19
-#define R600_SCISSOR_PM4 128
-
-/* R600_BLEND */
-#define R600_BLEND__CB_BLEND_RED 0
-#define R600_BLEND__CB_BLEND_GREEN 1
-#define R600_BLEND__CB_BLEND_BLUE 2
-#define R600_BLEND__CB_BLEND_ALPHA 3
-#define R600_BLEND__CB_BLEND0_CONTROL 4
-#define R600_BLEND__CB_BLEND1_CONTROL 5
-#define R600_BLEND__CB_BLEND2_CONTROL 6
-#define R600_BLEND__CB_BLEND3_CONTROL 7
-#define R600_BLEND__CB_BLEND4_CONTROL 8
-#define R600_BLEND__CB_BLEND5_CONTROL 9
-#define R600_BLEND__CB_BLEND6_CONTROL 10
-#define R600_BLEND__CB_BLEND7_CONTROL 11
-#define R600_BLEND__CB_BLEND_CONTROL 12
-#define R600_BLEND_SIZE 13
-#define R600_BLEND_PM4 128
-
-/* R600_DSA */
-#define R600_DSA__DB_STENCIL_CLEAR 0
-#define R600_DSA__DB_DEPTH_CLEAR 1
-#define R600_DSA__SX_ALPHA_TEST_CONTROL 2
-#define R600_DSA__DB_STENCILREFMASK 3
-#define R600_DSA__DB_STENCILREFMASK_BF 4
-#define R600_DSA__SX_ALPHA_REF 5
-#define R600_DSA__SPI_FOG_FUNC_SCALE 6
-#define R600_DSA__SPI_FOG_FUNC_BIAS 7
-#define R600_DSA__SPI_FOG_CNTL 8
-#define R600_DSA__DB_DEPTH_CONTROL 9
-#define R600_DSA__DB_SHADER_CONTROL 10
-#define R600_DSA__DB_RENDER_CONTROL 11
-#define R600_DSA__DB_RENDER_OVERRIDE 12
-#define R600_DSA__DB_SRESULTS_COMPARE_STATE1 13
-#define R600_DSA__DB_PRELOAD_CONTROL 14
-#define R600_DSA__DB_ALPHA_TO_MASK 15
-#define R600_DSA_SIZE 16
-#define R600_DSA_PM4 128
-
-/* R600_VS_SHADER */
-#define R600_VS_SHADER__SQ_VTX_SEMANTIC_0 0
-#define R600_VS_SHADER__SQ_VTX_SEMANTIC_1 1
-#define R600_VS_SHADER__SQ_VTX_SEMANTIC_2 2
-#define R600_VS_SHADER__SQ_VTX_SEMANTIC_3 3
-#define R600_VS_SHADER__SQ_VTX_SEMANTIC_4 4
-#define R600_VS_SHADER__SQ_VTX_SEMANTIC_5 5
-#define R600_VS_SHADER__SQ_VTX_SEMANTIC_6 6
-#define R600_VS_SHADER__SQ_VTX_SEMANTIC_7 7
-#define R600_VS_SHADER__SQ_VTX_SEMANTIC_8 8
-#define R600_VS_SHADER__SQ_VTX_SEMANTIC_9 9
-#define R600_VS_SHADER__SQ_VTX_SEMANTIC_10 10
-#define R600_VS_SHADER__SQ_VTX_SEMANTIC_11 11
-#define R600_VS_SHADER__SQ_VTX_SEMANTIC_12 12
-#define R600_VS_SHADER__SQ_VTX_SEMANTIC_13 13
-#define R600_VS_SHADER__SQ_VTX_SEMANTIC_14 14
-#define R600_VS_SHADER__SQ_VTX_SEMANTIC_15 15
-#define R600_VS_SHADER__SQ_VTX_SEMANTIC_16 16
-#define R600_VS_SHADER__SQ_VTX_SEMANTIC_17 17
-#define R600_VS_SHADER__SQ_VTX_SEMANTIC_18 18
-#define R600_VS_SHADER__SQ_VTX_SEMANTIC_19 19
-#define R600_VS_SHADER__SQ_VTX_SEMANTIC_20 20
-#define R600_VS_SHADER__SQ_VTX_SEMANTIC_21 21
-#define R600_VS_SHADER__SQ_VTX_SEMANTIC_22 22
-#define R600_VS_SHADER__SQ_VTX_SEMANTIC_23 23
-#define R600_VS_SHADER__SQ_VTX_SEMANTIC_24 24
-#define R600_VS_SHADER__SQ_VTX_SEMANTIC_25 25
-#define R600_VS_SHADER__SQ_VTX_SEMANTIC_26 26
-#define R600_VS_SHADER__SQ_VTX_SEMANTIC_27 27
-#define R600_VS_SHADER__SQ_VTX_SEMANTIC_28 28
-#define R600_VS_SHADER__SQ_VTX_SEMANTIC_29 29
-#define R600_VS_SHADER__SQ_VTX_SEMANTIC_30 30
-#define R600_VS_SHADER__SQ_VTX_SEMANTIC_31 31
-#define R600_VS_SHADER__SPI_VS_OUT_ID_0 32
-#define R600_VS_SHADER__SPI_VS_OUT_ID_1 33
-#define R600_VS_SHADER__SPI_VS_OUT_ID_2 34
-#define R600_VS_SHADER__SPI_VS_OUT_ID_3 35
-#define R600_VS_SHADER__SPI_VS_OUT_ID_4 36
-#define R600_VS_SHADER__SPI_VS_OUT_ID_5 37
-#define R600_VS_SHADER__SPI_VS_OUT_ID_6 38
-#define R600_VS_SHADER__SPI_VS_OUT_ID_7 39
-#define R600_VS_SHADER__SPI_VS_OUT_ID_8 40
-#define R600_VS_SHADER__SPI_VS_OUT_ID_9 41
-#define R600_VS_SHADER__SPI_VS_OUT_CONFIG 42
-#define R600_VS_SHADER__SQ_PGM_START_VS 43
-#define R600_VS_SHADER__SQ_PGM_RESOURCES_VS 44
-#define R600_VS_SHADER__SQ_PGM_START_FS 45
-#define R600_VS_SHADER__SQ_PGM_RESOURCES_FS 46
-#define R600_VS_SHADER__SQ_PGM_CF_OFFSET_VS 47
-#define R600_VS_SHADER__SQ_PGM_CF_OFFSET_FS 48
-#define R600_VS_SHADER_SIZE 49
-#define R600_VS_SHADER_PM4 128
-
-/* R600_PS_SHADER */
-#define R600_PS_SHADER__SPI_PS_INPUT_CNTL_0 0
-#define R600_PS_SHADER__SPI_PS_INPUT_CNTL_1 1
-#define R600_PS_SHADER__SPI_PS_INPUT_CNTL_2 2
-#define R600_PS_SHADER__SPI_PS_INPUT_CNTL_3 3
-#define R600_PS_SHADER__SPI_PS_INPUT_CNTL_4 4
-#define R600_PS_SHADER__SPI_PS_INPUT_CNTL_5 5
-#define R600_PS_SHADER__SPI_PS_INPUT_CNTL_6 6
-#define R600_PS_SHADER__SPI_PS_INPUT_CNTL_7 7
-#define R600_PS_SHADER__SPI_PS_INPUT_CNTL_8 8
-#define R600_PS_SHADER__SPI_PS_INPUT_CNTL_9 9
-#define R600_PS_SHADER__SPI_PS_INPUT_CNTL_10 10
-#define R600_PS_SHADER__SPI_PS_INPUT_CNTL_11 11
-#define R600_PS_SHADER__SPI_PS_INPUT_CNTL_12 12
-#define R600_PS_SHADER__SPI_PS_INPUT_CNTL_13 13
-#define R600_PS_SHADER__SPI_PS_INPUT_CNTL_14 14
-#define R600_PS_SHADER__SPI_PS_INPUT_CNTL_15 15
-#define R600_PS_SHADER__SPI_PS_INPUT_CNTL_16 16
-#define R600_PS_SHADER__SPI_PS_INPUT_CNTL_17 17
-#define R600_PS_SHADER__SPI_PS_INPUT_CNTL_18 18
-#define R600_PS_SHADER__SPI_PS_INPUT_CNTL_19 19
-#define R600_PS_SHADER__SPI_PS_INPUT_CNTL_20 20
-#define R600_PS_SHADER__SPI_PS_INPUT_CNTL_21 21
-#define R600_PS_SHADER__SPI_PS_INPUT_CNTL_22 22
-#define R600_PS_SHADER__SPI_PS_INPUT_CNTL_23 23
-#define R600_PS_SHADER__SPI_PS_INPUT_CNTL_24 24
-#define R600_PS_SHADER__SPI_PS_INPUT_CNTL_25 25
-#define R600_PS_SHADER__SPI_PS_INPUT_CNTL_26 26
-#define R600_PS_SHADER__SPI_PS_INPUT_CNTL_27 27
-#define R600_PS_SHADER__SPI_PS_INPUT_CNTL_28 28
-#define R600_PS_SHADER__SPI_PS_INPUT_CNTL_29 29
-#define R600_PS_SHADER__SPI_PS_INPUT_CNTL_30 30
-#define R600_PS_SHADER__SPI_PS_INPUT_CNTL_31 31
-#define R600_PS_SHADER__SPI_PS_IN_CONTROL_0 32
-#define R600_PS_SHADER__SPI_PS_IN_CONTROL_1 33
-#define R600_PS_SHADER__SPI_INPUT_Z 34
-#define R600_PS_SHADER__SQ_PGM_START_PS 35
-#define R600_PS_SHADER__SQ_PGM_RESOURCES_PS 36
-#define R600_PS_SHADER__SQ_PGM_EXPORTS_PS 37
-#define R600_PS_SHADER__SQ_PGM_CF_OFFSET_PS 38
-#define R600_PS_SHADER_SIZE 39
-#define R600_PS_SHADER_PM4 128
-
-/* R600_VS_CBUF */
-#define R600_VS_CBUF__ALU_CONST_BUFFER_SIZE_VS_0 0
-#define R600_VS_CBUF__ALU_CONST_CACHE_VS_0 1
-#define R600_VS_CBUF_SIZE 2
-#define R600_VS_CBUF_PM4 128
-
-/* R600_PS_CBUF */
-#define R600_PS_CBUF__ALU_CONST_BUFFER_SIZE_PS_0 0
-#define R600_PS_CBUF__ALU_CONST_CACHE_PS_0 1
-#define R600_PS_CBUF_SIZE 2
-#define R600_PS_CBUF_PM4 128
-
-/* R600_PS_CONSTANT */
-#define R600_PS_CONSTANT__SQ_ALU_CONSTANT0_0 0
-#define R600_PS_CONSTANT__SQ_ALU_CONSTANT1_0 1
-#define R600_PS_CONSTANT__SQ_ALU_CONSTANT2_0 2
-#define R600_PS_CONSTANT__SQ_ALU_CONSTANT3_0 3
-#define R600_PS_CONSTANT_SIZE 4
-#define R600_PS_CONSTANT_PM4 128
-
-/* R600_VS_CONSTANT */
-#define R600_VS_CONSTANT__SQ_ALU_CONSTANT0_256 0
-#define R600_VS_CONSTANT__SQ_ALU_CONSTANT1_256 1
-#define R600_VS_CONSTANT__SQ_ALU_CONSTANT2_256 2
-#define R600_VS_CONSTANT__SQ_ALU_CONSTANT3_256 3
-#define R600_VS_CONSTANT_SIZE 4
-#define R600_VS_CONSTANT_PM4 128
-
-/* R600_UCP */
-#define R600_UCP__PA_CL_UCP0_X 0
-#define R600_UCP__PA_CL_UCP0_Y 1
-#define R600_UCP__PA_CL_UCP0_Z 2
-#define R600_UCP__PA_CL_UCP0_W 3
-#define R600_UCP__PA_CL_UCP1_X 4
-#define R600_UCP__PA_CL_UCP1_Y 5
-#define R600_UCP__PA_CL_UCP1_Z 6
-#define R600_UCP__PA_CL_UCP1_W 7
-#define R600_UCP__PA_CL_UCP2_X 8
-#define R600_UCP__PA_CL_UCP2_Y 9
-#define R600_UCP__PA_CL_UCP2_Z 10
-#define R600_UCP__PA_CL_UCP2_W 11
-#define R600_UCP__PA_CL_UCP3_X 12
-#define R600_UCP__PA_CL_UCP3_Y 13
-#define R600_UCP__PA_CL_UCP3_Z 14
-#define R600_UCP__PA_CL_UCP3_W 15
-#define R600_UCP__PA_CL_UCP4_X 16
-#define R600_UCP__PA_CL_UCP4_Y 17
-#define R600_UCP__PA_CL_UCP4_Z 18
-#define R600_UCP__PA_CL_UCP4_W 19
-#define R600_UCP__PA_CL_UCP5_X 20
-#define R600_UCP__PA_CL_UCP5_Y 21
-#define R600_UCP__PA_CL_UCP5_Z 22
-#define R600_UCP__PA_CL_UCP5_W 23
-#define R600_UCP_SIZE 24
-#define R600_UCP_PM4 128
-
-/* R600_PS_RESOURCE */
-#define R600_PS_RESOURCE__RESOURCE0_WORD0 0
-#define R600_PS_RESOURCE__RESOURCE0_WORD1 1
-#define R600_PS_RESOURCE__RESOURCE0_WORD2 2
-#define R600_PS_RESOURCE__RESOURCE0_WORD3 3
-#define R600_PS_RESOURCE__RESOURCE0_WORD4 4
-#define R600_PS_RESOURCE__RESOURCE0_WORD5 5
-#define R600_PS_RESOURCE__RESOURCE0_WORD6 6
-#define R600_PS_RESOURCE_SIZE 7
-#define R600_PS_RESOURCE_PM4 128
-
-/* R600_VS_RESOURCE */
-#define R600_VS_RESOURCE__RESOURCE160_WORD0 0
-#define R600_VS_RESOURCE__RESOURCE160_WORD1 1
-#define R600_VS_RESOURCE__RESOURCE160_WORD2 2
-#define R600_VS_RESOURCE__RESOURCE160_WORD3 3
-#define R600_VS_RESOURCE__RESOURCE160_WORD4 4
-#define R600_VS_RESOURCE__RESOURCE160_WORD5 5
-#define R600_VS_RESOURCE__RESOURCE160_WORD6 6
-#define R600_VS_RESOURCE_SIZE 7
-#define R600_VS_RESOURCE_PM4 128
-
-/* R600_FS_RESOURCE */
-#define R600_FS_RESOURCE__RESOURCE320_WORD0 0
-#define R600_FS_RESOURCE__RESOURCE320_WORD1 1
-#define R600_FS_RESOURCE__RESOURCE320_WORD2 2
-#define R600_FS_RESOURCE__RESOURCE320_WORD3 3
-#define R600_FS_RESOURCE__RESOURCE320_WORD4 4
-#define R600_FS_RESOURCE__RESOURCE320_WORD5 5
-#define R600_FS_RESOURCE__RESOURCE320_WORD6 6
-#define R600_FS_RESOURCE_SIZE 7
-#define R600_FS_RESOURCE_PM4 128
-
-/* R600_GS_RESOURCE */
-#define R600_GS_RESOURCE__RESOURCE336_WORD0 0
-#define R600_GS_RESOURCE__RESOURCE336_WORD1 1
-#define R600_GS_RESOURCE__RESOURCE336_WORD2 2
-#define R600_GS_RESOURCE__RESOURCE336_WORD3 3
-#define R600_GS_RESOURCE__RESOURCE336_WORD4 4
-#define R600_GS_RESOURCE__RESOURCE336_WORD5 5
-#define R600_GS_RESOURCE__RESOURCE336_WORD6 6
-#define R600_GS_RESOURCE_SIZE 7
-#define R600_GS_RESOURCE_PM4 128
-
-/* R600_PS_SAMPLER */
-#define R600_PS_SAMPLER__SQ_TEX_SAMPLER_WORD0_0 0
-#define R600_PS_SAMPLER__SQ_TEX_SAMPLER_WORD1_0 1
-#define R600_PS_SAMPLER__SQ_TEX_SAMPLER_WORD2_0 2
-#define R600_PS_SAMPLER_SIZE 3
-#define R600_PS_SAMPLER_PM4 128
-
-/* R600_VS_SAMPLER */
-#define R600_VS_SAMPLER__SQ_TEX_SAMPLER_WORD0_18 0
-#define R600_VS_SAMPLER__SQ_TEX_SAMPLER_WORD1_18 1
-#define R600_VS_SAMPLER__SQ_TEX_SAMPLER_WORD2_18 2
-#define R600_VS_SAMPLER_SIZE 3
-#define R600_VS_SAMPLER_PM4 128
-
-/* R600_GS_SAMPLER */
-#define R600_GS_SAMPLER__SQ_TEX_SAMPLER_WORD0_36 0
-#define R600_GS_SAMPLER__SQ_TEX_SAMPLER_WORD1_36 1
-#define R600_GS_SAMPLER__SQ_TEX_SAMPLER_WORD2_36 2
-#define R600_GS_SAMPLER_SIZE 3
-#define R600_GS_SAMPLER_PM4 128
-
-/* R600_PS_SAMPLER_BORDER */
-#define R600_PS_SAMPLER_BORDER__TD_PS_SAMPLER0_BORDER_RED 0
-#define R600_PS_SAMPLER_BORDER__TD_PS_SAMPLER0_BORDER_GREEN 1
-#define R600_PS_SAMPLER_BORDER__TD_PS_SAMPLER0_BORDER_BLUE 2
-#define R600_PS_SAMPLER_BORDER__TD_PS_SAMPLER0_BORDER_ALPHA 3
-#define R600_PS_SAMPLER_BORDER_SIZE 4
-#define R600_PS_SAMPLER_BORDER_PM4 128
-
-/* R600_VS_SAMPLER_BORDER */
-#define R600_VS_SAMPLER_BORDER__TD_VS_SAMPLER0_BORDER_RED 0
-#define R600_VS_SAMPLER_BORDER__TD_VS_SAMPLER0_BORDER_GREEN 1
-#define R600_VS_SAMPLER_BORDER__TD_VS_SAMPLER0_BORDER_BLUE 2
-#define R600_VS_SAMPLER_BORDER__TD_VS_SAMPLER0_BORDER_ALPHA 3
-#define R600_VS_SAMPLER_BORDER_SIZE 4
-#define R600_VS_SAMPLER_BORDER_PM4 128
-
-/* R600_GS_SAMPLER_BORDER */
-#define R600_GS_SAMPLER_BORDER__TD_GS_SAMPLER0_BORDER_RED 0
-#define R600_GS_SAMPLER_BORDER__TD_GS_SAMPLER0_BORDER_GREEN 1
-#define R600_GS_SAMPLER_BORDER__TD_GS_SAMPLER0_BORDER_BLUE 2
-#define R600_GS_SAMPLER_BORDER__TD_GS_SAMPLER0_BORDER_ALPHA 3
-#define R600_GS_SAMPLER_BORDER_SIZE 4
-#define R600_GS_SAMPLER_BORDER_PM4 128
-
-/* R600_CB0 */
-#define R600_CB0__CB_COLOR0_BASE 0
-#define R600_CB0__CB_COLOR0_INFO 1
-#define R600_CB0__CB_COLOR0_SIZE 2
-#define R600_CB0__CB_COLOR0_VIEW 3
-#define R600_CB0__CB_COLOR0_FRAG 4
-#define R600_CB0__CB_COLOR0_TILE 5
-#define R600_CB0__CB_COLOR0_MASK 6
-#define R600_CB0_SIZE 7
-#define R600_CB0_PM4 128
-
-/* R600_CB1 */
-#define R600_CB1__CB_COLOR1_BASE 0
-#define R600_CB1__CB_COLOR1_INFO 1
-#define R600_CB1__CB_COLOR1_SIZE 2
-#define R600_CB1__CB_COLOR1_VIEW 3
-#define R600_CB1__CB_COLOR1_FRAG 4
-#define R600_CB1__CB_COLOR1_TILE 5
-#define R600_CB1__CB_COLOR1_MASK 6
-#define R600_CB1_SIZE 7
-#define R600_CB1_PM4 128
-
-/* R600_CB2 */
-#define R600_CB2__CB_COLOR2_BASE 0
-#define R600_CB2__CB_COLOR2_INFO 1
-#define R600_CB2__CB_COLOR2_SIZE 2
-#define R600_CB2__CB_COLOR2_VIEW 3
-#define R600_CB2__CB_COLOR2_FRAG 4
-#define R600_CB2__CB_COLOR2_TILE 5
-#define R600_CB2__CB_COLOR2_MASK 6
-#define R600_CB2_SIZE 7
-#define R600_CB2_PM4 128
-
-/* R600_CB3 */
-#define R600_CB3__CB_COLOR3_BASE 0
-#define R600_CB3__CB_COLOR3_INFO 1
-#define R600_CB3__CB_COLOR3_SIZE 2
-#define R600_CB3__CB_COLOR3_VIEW 3
-#define R600_CB3__CB_COLOR3_FRAG 4
-#define R600_CB3__CB_COLOR3_TILE 5
-#define R600_CB3__CB_COLOR3_MASK 6
-#define R600_CB3_SIZE 7
-#define R600_CB3_PM4 128
-
-/* R600_CB4 */
-#define R600_CB4__CB_COLOR4_BASE 0
-#define R600_CB4__CB_COLOR4_INFO 1
-#define R600_CB4__CB_COLOR4_SIZE 2
-#define R600_CB4__CB_COLOR4_VIEW 3
-#define R600_CB4__CB_COLOR4_FRAG 4
-#define R600_CB4__CB_COLOR4_TILE 5
-#define R600_CB4__CB_COLOR4_MASK 6
-#define R600_CB4_SIZE 7
-#define R600_CB4_PM4 128
-
-/* R600_CB5 */
-#define R600_CB5__CB_COLOR5_BASE 0
-#define R600_CB5__CB_COLOR5_INFO 1
-#define R600_CB5__CB_COLOR5_SIZE 2
-#define R600_CB5__CB_COLOR5_VIEW 3
-#define R600_CB5__CB_COLOR5_FRAG 4
-#define R600_CB5__CB_COLOR5_TILE 5
-#define R600_CB5__CB_COLOR5_MASK 6
-#define R600_CB5_SIZE 7
-#define R600_CB5_PM4 128
-
-/* R600_CB6 */
-#define R600_CB6__CB_COLOR6_BASE 0
-#define R600_CB6__CB_COLOR6_INFO 1
-#define R600_CB6__CB_COLOR6_SIZE 2
-#define R600_CB6__CB_COLOR6_VIEW 3
-#define R600_CB6__CB_COLOR6_FRAG 4
-#define R600_CB6__CB_COLOR6_TILE 5
-#define R600_CB6__CB_COLOR6_MASK 6
-#define R600_CB6_SIZE 7
-#define R600_CB6_PM4 128
-
-/* R600_CB7 */
-#define R600_CB7__CB_COLOR7_BASE 0
-#define R600_CB7__CB_COLOR7_INFO 1
-#define R600_CB7__CB_COLOR7_SIZE 2
-#define R600_CB7__CB_COLOR7_VIEW 3
-#define R600_CB7__CB_COLOR7_FRAG 4
-#define R600_CB7__CB_COLOR7_TILE 5
-#define R600_CB7__CB_COLOR7_MASK 6
-#define R600_CB7_SIZE 7
-#define R600_CB7_PM4 128
-
-/* R600_DB */
-#define R600_DB__DB_DEPTH_BASE 0
-#define R600_DB__DB_DEPTH_SIZE 1
-#define R600_DB__DB_DEPTH_VIEW 2
-#define R600_DB__DB_DEPTH_INFO 3
-#define R600_DB__DB_HTILE_SURFACE 4
-#define R600_DB__DB_PREFETCH_LIMIT 5
-#define R600_DB_SIZE 6
-#define R600_DB_PM4 128
-
-/* R600_VGT */
-#define R600_VGT__VGT_PRIMITIVE_TYPE 0
-#define R600_VGT__VGT_MAX_VTX_INDX 1
-#define R600_VGT__VGT_MIN_VTX_INDX 2
-#define R600_VGT__VGT_INDX_OFFSET 3
-#define R600_VGT__VGT_MULTI_PRIM_IB_RESET_INDX 4
-#define R600_VGT__VGT_DMA_INDEX_TYPE 5
-#define R600_VGT__VGT_PRIMITIVEID_EN 6
-#define R600_VGT__VGT_DMA_NUM_INSTANCES 7
-#define R600_VGT__VGT_MULTI_PRIM_IB_RESET_EN 8
-#define R600_VGT__VGT_INSTANCE_STEP_RATE_0 9
-#define R600_VGT__VGT_INSTANCE_STEP_RATE_1 10
-#define R600_VGT_SIZE 11
-#define R600_VGT_PM4 128
-
-/* R600_DRAW */
-#define R600_DRAW__VGT_NUM_INDICES 0
-#define R600_DRAW__VGT_DMA_BASE_HI 1
-#define R600_DRAW__VGT_DMA_BASE 2
-#define R600_DRAW__VGT_DRAW_INITIATOR 3
-#define R600_DRAW_SIZE 4
-#define R600_DRAW_PM4 128
-
-/* R600_VGT_EVENT */
-#define R600_VGT_EVENT__VGT_EVENT_INITIATOR 0
-#define R600_VGT_EVENT_SIZE 1
-#define R600_VGT_EVENT_PM4 128
-
-/* R600_CB_FLUSH */
-#define R600_CB_FLUSH_SIZE 0
-#define R600_CB_FLUSH_PM4 128
-
-/* R600_DB_FLUSH */
-#define R600_DB_FLUSH_SIZE 0
-#define R600_DB_FLUSH_PM4 128
-
diff --git a/src/gallium/drivers/r600/r600_texture.c b/src/gallium/drivers/r600/r600_texture.c
index d4d9b07c0e..dc351bfb62 100644
--- a/src/gallium/drivers/r600/r600_texture.c
+++ b/src/gallium/drivers/r600/r600_texture.c
@@ -27,6 +27,7 @@
#include <errno.h>
#include <pipe/p_screen.h>
#include <util/u_format.h>
+#include <util/u_format_s3tc.h>
#include <util/u_math.h>
#include <util/u_inlines.h>
#include <util/u_memory.h>
@@ -38,8 +39,6 @@
#include "r600d.h"
#include "r600_formats.h"
-extern struct u_resource_vtbl r600_texture_vtbl;
-
/* Copy from a full GPU texture to a transfer's staging one. */
static void r600_copy_to_staging_texture(struct pipe_context *ctx, struct r600_transfer *rtransfer)
{
@@ -69,7 +68,7 @@ static void r600_copy_from_staging_texture(struct pipe_context *ctx, struct r600
rtransfer->staging_texture,
0, &sbox);
- ctx->flush(ctx, 0, NULL);
+ ctx->flush(ctx, NULL);
}
unsigned r600_texture_get_offset(struct r600_resource_texture *rtex,
@@ -77,17 +76,15 @@ unsigned r600_texture_get_offset(struct r600_resource_texture *rtex,
{
unsigned offset = rtex->offset[level];
- switch (rtex->resource.base.b.target) {
+ switch (rtex->resource.b.b.b.target) {
case PIPE_TEXTURE_3D:
case PIPE_TEXTURE_CUBE:
- return offset + layer * rtex->layer_size[level];
default:
- assert(layer == 0);
- return offset;
+ return offset + layer * rtex->layer_size[level];
}
}
-static unsigned r600_get_pixel_alignment(struct pipe_screen *screen,
+static unsigned r600_get_block_alignment(struct pipe_screen *screen,
enum pipe_format format,
unsigned array_mode)
{
@@ -105,6 +102,9 @@ static unsigned r600_get_pixel_alignment(struct pipe_screen *screen,
(((rscreen->tiling_info->group_bytes / 8 / pixsize)) *
rscreen->tiling_info->num_banks)) * 8;
break;
+ case V_038000_ARRAY_LINEAR_ALIGNED:
+ p_align = MAX2(64, rscreen->tiling_info->group_bytes / pixsize);
+ break;
case V_038000_ARRAY_LINEAR_GENERAL:
default:
p_align = rscreen->tiling_info->group_bytes / pixsize;
@@ -124,8 +124,10 @@ static unsigned r600_get_height_alignment(struct pipe_screen *screen,
h_align = rscreen->tiling_info->num_channels * 8;
break;
case V_038000_ARRAY_1D_TILED_THIN1:
+ case V_038000_ARRAY_LINEAR_ALIGNED:
h_align = 8;
break;
+ case V_038000_ARRAY_LINEAR_GENERAL:
default:
h_align = 1;
break;
@@ -139,7 +141,7 @@ static unsigned r600_get_base_alignment(struct pipe_screen *screen,
{
struct r600_screen* rscreen = (struct r600_screen *)screen;
unsigned pixsize = util_format_get_blocksize(format);
- int p_align = r600_get_pixel_alignment(screen, format, array_mode);
+ int p_align = r600_get_block_alignment(screen, format, array_mode);
int h_align = r600_get_height_alignment(screen, array_mode);
int b_align;
@@ -149,6 +151,8 @@ static unsigned r600_get_base_alignment(struct pipe_screen *screen,
p_align * pixsize * h_align);
break;
case V_038000_ARRAY_1D_TILED_THIN1:
+ case V_038000_ARRAY_LINEAR_ALIGNED:
+ case V_038000_ARRAY_LINEAR_GENERAL:
default:
b_align = rscreen->tiling_info->group_bytes;
break;
@@ -165,55 +169,46 @@ static unsigned mip_minify(unsigned size, unsigned level)
return val;
}
-static unsigned r600_texture_get_stride(struct pipe_screen *screen,
- struct r600_resource_texture *rtex,
- unsigned level)
+static unsigned r600_texture_get_nblocksx(struct pipe_screen *screen,
+ struct r600_resource_texture *rtex,
+ unsigned level)
{
- struct pipe_resource *ptex = &rtex->resource.base.b;
- unsigned width, stride, tile_width;
+ struct pipe_resource *ptex = &rtex->resource.b.b.b;
+ unsigned nblocksx, block_align, width;
+ unsigned blocksize = util_format_get_blocksize(ptex->format);
if (rtex->pitch_override)
- return rtex->pitch_override;
+ return rtex->pitch_override / blocksize;
width = mip_minify(ptex->width0, level);
- if (util_format_is_plain(ptex->format)) {
- tile_width = r600_get_pixel_alignment(screen, ptex->format,
- rtex->array_mode[level]);
- width = align(width, tile_width);
- }
- stride = util_format_get_stride(ptex->format, width);
+ nblocksx = util_format_get_nblocksx(ptex->format, width);
- return stride;
+ block_align = r600_get_block_alignment(screen, ptex->format,
+ rtex->array_mode[level]);
+ nblocksx = align(nblocksx, block_align);
+ return nblocksx;
}
static unsigned r600_texture_get_nblocksy(struct pipe_screen *screen,
struct r600_resource_texture *rtex,
unsigned level)
{
- struct pipe_resource *ptex = &rtex->resource.base.b;
+ struct pipe_resource *ptex = &rtex->resource.b.b.b;
unsigned height, tile_height;
height = mip_minify(ptex->height0, level);
- if (util_format_is_plain(ptex->format)) {
- tile_height = r600_get_height_alignment(screen,
- rtex->array_mode[level]);
- height = align(height, tile_height);
- }
- return util_format_get_nblocksy(ptex->format, height);
-}
-
-/* Get a width in pixels from a stride in bytes. */
-static unsigned pitch_to_width(enum pipe_format format, unsigned pitch_in_bytes)
-{
- return (pitch_in_bytes / util_format_get_blocksize(format)) *
- util_format_get_blockwidth(format);
+ height = util_format_get_nblocksy(ptex->format, height);
+ tile_height = r600_get_height_alignment(screen,
+ rtex->array_mode[level]);
+ height = align(height, tile_height);
+ return height;
}
static void r600_texture_set_array_mode(struct pipe_screen *screen,
struct r600_resource_texture *rtex,
unsigned level, unsigned array_mode)
{
- struct pipe_resource *ptex = &rtex->resource.base.b;
+ struct pipe_resource *ptex = &rtex->resource.b.b.b;
switch (array_mode) {
case V_0280A0_ARRAY_LINEAR_GENERAL:
@@ -227,11 +222,11 @@ static void r600_texture_set_array_mode(struct pipe_screen *screen,
unsigned w, h, tile_height, tile_width;
tile_height = r600_get_height_alignment(screen, array_mode);
- tile_width = r600_get_pixel_alignment(screen, ptex->format, array_mode);
+ tile_width = r600_get_block_alignment(screen, ptex->format, array_mode);
w = mip_minify(ptex->width0, level);
h = mip_minify(ptex->height0, level);
- if (w < tile_width || h < tile_height)
+ if (w <= tile_width || h <= tile_height)
rtex->array_mode[level] = V_0280A0_ARRAY_1D_TILED_THIN1;
else
rtex->array_mode[level] = array_mode;
@@ -244,40 +239,119 @@ static void r600_setup_miptree(struct pipe_screen *screen,
struct r600_resource_texture *rtex,
unsigned array_mode)
{
- struct pipe_resource *ptex = &rtex->resource.base.b;
+ struct pipe_resource *ptex = &rtex->resource.b.b.b;
struct radeon *radeon = (struct radeon *)screen->winsys;
enum chip_class chipc = r600_get_family_class(radeon);
- unsigned pitch, size, layer_size, i, offset;
- unsigned nblocksy;
+ unsigned size, layer_size, i, offset;
+ unsigned nblocksx, nblocksy;
for (i = 0, offset = 0; i <= ptex->last_level; i++) {
+ unsigned blocksize = util_format_get_blocksize(ptex->format);
+
r600_texture_set_array_mode(screen, rtex, i, array_mode);
- pitch = r600_texture_get_stride(screen, rtex, i);
+ nblocksx = r600_texture_get_nblocksx(screen, rtex, i);
nblocksy = r600_texture_get_nblocksy(screen, rtex, i);
- layer_size = pitch * nblocksy;
-
+ layer_size = nblocksx * nblocksy * blocksize;
if (ptex->target == PIPE_TEXTURE_CUBE) {
if (chipc >= R700)
size = layer_size * 8;
else
size = layer_size * 6;
}
- else
+ else if (ptex->target == PIPE_TEXTURE_3D)
size = layer_size * u_minify(ptex->depth0, i);
+ else
+ size = layer_size * ptex->array_size;
+
/* align base image and start of miptree */
if ((i == 0) || (i == 1))
offset = align(offset, r600_get_base_alignment(screen, ptex->format, array_mode));
rtex->offset[i] = offset;
rtex->layer_size[i] = layer_size;
- rtex->pitch_in_bytes[i] = pitch;
- rtex->pitch_in_pixels[i] = pitch_to_width(ptex->format, pitch);
+ rtex->pitch_in_blocks[i] = nblocksx; /* CB talks in elements */
+ rtex->pitch_in_bytes[i] = nblocksx * blocksize;
+
offset += size;
}
rtex->size = offset;
}
+/* Figure out whether u_blitter will fallback to a transfer operation.
+ * If so, don't use a staging resource.
+ */
+static boolean permit_hardware_blit(struct pipe_screen *screen,
+ const struct pipe_resource *res)
+{
+ unsigned bind;
+
+ if (util_format_is_depth_or_stencil(res->format))
+ bind = PIPE_BIND_DEPTH_STENCIL;
+ else
+ bind = PIPE_BIND_RENDER_TARGET;
+
+ /* hackaround for S3TC */
+ if (util_format_is_compressed(res->format))
+ return TRUE;
+
+ if (!screen->is_format_supported(screen,
+ res->format,
+ res->target,
+ res->nr_samples,
+ bind))
+ return FALSE;
+
+ if (!screen->is_format_supported(screen,
+ res->format,
+ res->target,
+ res->nr_samples,
+ PIPE_BIND_SAMPLER_VIEW))
+ return FALSE;
+
+ return TRUE;
+}
+
+static boolean r600_texture_get_handle(struct pipe_screen* screen,
+ struct pipe_resource *ptex,
+ struct winsys_handle *whandle)
+{
+ struct r600_resource_texture *rtex = (struct r600_resource_texture*)ptex;
+ struct r600_resource *resource = &rtex->resource;
+ struct radeon *radeon = (struct radeon *)screen->winsys;
+
+ return r600_bo_get_winsys_handle(radeon, resource->bo,
+ rtex->pitch_in_bytes[0], whandle);
+}
+
+static void r600_texture_destroy(struct pipe_screen *screen,
+ struct pipe_resource *ptex)
+{
+ struct r600_resource_texture *rtex = (struct r600_resource_texture*)ptex;
+ struct r600_resource *resource = &rtex->resource;
+ struct radeon *radeon = (struct radeon *)screen->winsys;
+
+ if (rtex->flushed_depth_texture)
+ pipe_resource_reference((struct pipe_resource **)&rtex->flushed_depth_texture, NULL);
+
+ if (resource->bo) {
+ r600_bo_reference(radeon, &resource->bo, NULL);
+ }
+ FREE(rtex);
+}
+
+static const struct u_resource_vtbl r600_texture_vtbl =
+{
+ r600_texture_get_handle, /* get_handle */
+ r600_texture_destroy, /* resource_destroy */
+ r600_texture_get_transfer, /* get_transfer */
+ r600_texture_transfer_destroy, /* transfer_destroy */
+ r600_texture_transfer_map, /* transfer_map */
+ u_default_transfer_flush_region,/* transfer_flush_region */
+ r600_texture_transfer_unmap, /* transfer_unmap */
+ u_default_transfer_inline_write /* transfer_inline_write */
+};
+
static struct r600_resource_texture *
r600_texture_create_object(struct pipe_screen *screen,
const struct pipe_resource *base,
@@ -295,21 +369,22 @@ r600_texture_create_object(struct pipe_screen *screen,
return NULL;
resource = &rtex->resource;
- resource->base.b = *base;
- resource->base.vtbl = &r600_texture_vtbl;
- pipe_reference_init(&resource->base.b.reference, 1);
- resource->base.b.screen = screen;
+ resource->b.b.b = *base;
+ resource->b.b.vtbl = &r600_texture_vtbl;
+ pipe_reference_init(&resource->b.b.b.reference, 1);
+ resource->b.b.b.screen = screen;
resource->bo = bo;
rtex->pitch_override = pitch_in_bytes_override;
+ /* only mark depth textures the HW can hit as depth textures */
+ if (util_format_is_depth_or_stencil(base->format) && permit_hardware_blit(screen, base))
+ rtex->depth = 1;
- if (array_mode)
- rtex->tiled = 1;
r600_setup_miptree(screen, rtex, array_mode);
resource->size = rtex->size;
if (!resource->bo) {
- struct pipe_resource *ptex = &rtex->resource.base.b;
+ struct pipe_resource *ptex = &rtex->resource.b.b.b;
int base_align = r600_get_base_alignment(screen, ptex->format, array_mode);
resource->bo = r600_bo(radeon, rtex->size, base_align, base->bind, base->usage);
@@ -329,56 +404,38 @@ struct pipe_resource *r600_texture_create(struct pipe_screen *screen,
/* Would like some magic "get_bool_option_once" routine.
*/
- if (force_tiling == -1)
- force_tiling = debug_get_bool_option("R600_FORCE_TILING", FALSE);
+ if (force_tiling == -1) {
+#if 0
+ /* reenable when 2D tiling is fixed better */
+ struct r600_screen *rscreen = (struct r600_screen *)screen;
+ if (r600_get_minor_version(rscreen->radeon) >= 9)
+ force_tiling = debug_get_bool_option("R600_TILING", TRUE);
+#endif
+ force_tiling = debug_get_bool_option("R600_TILING", FALSE);
+ }
- if (force_tiling) {
+ if (force_tiling && permit_hardware_blit(screen, templ)) {
if (!(templ->flags & R600_RESOURCE_FLAG_TRANSFER) &&
!(templ->bind & PIPE_BIND_SCANOUT)) {
array_mode = V_038000_ARRAY_2D_TILED_THIN1;
}
}
+ if (!(templ->flags & R600_RESOURCE_FLAG_TRANSFER) &&
+ util_format_is_compressed(templ->format))
+ array_mode = V_038000_ARRAY_1D_TILED_THIN1;
+
return (struct pipe_resource *)r600_texture_create_object(screen, templ, array_mode,
0, 0, NULL);
}
-static void r600_texture_destroy(struct pipe_screen *screen,
- struct pipe_resource *ptex)
-{
- struct r600_resource_texture *rtex = (struct r600_resource_texture*)ptex;
- struct r600_resource *resource = &rtex->resource;
- struct radeon *radeon = (struct radeon *)screen->winsys;
-
- if (rtex->flushed_depth_texture)
- pipe_resource_reference((struct pipe_resource **)&rtex->flushed_depth_texture, NULL);
-
- if (resource->bo) {
- r600_bo_reference(radeon, &resource->bo, NULL);
- }
- FREE(rtex);
-}
-
-static boolean r600_texture_get_handle(struct pipe_screen* screen,
- struct pipe_resource *ptex,
- struct winsys_handle *whandle)
-{
- struct r600_resource_texture *rtex = (struct r600_resource_texture*)ptex;
- struct r600_resource *resource = &rtex->resource;
- struct radeon *radeon = (struct radeon *)screen->winsys;
-
- return r600_bo_get_winsys_handle(radeon, resource->bo,
- rtex->pitch_in_bytes[0], whandle);
-}
-
static struct pipe_surface *r600_create_surface(struct pipe_context *pipe,
struct pipe_resource *texture,
const struct pipe_surface *surf_tmpl)
{
struct r600_resource_texture *rtex = (struct r600_resource_texture*)texture;
struct r600_surface *surface = CALLOC_STRUCT(r600_surface);
- unsigned tile_height;
unsigned level = surf_tmpl->u.tex.level;
assert(surf_tmpl->u.tex.first_layer == surf_tmpl->u.tex.last_layer);
@@ -398,8 +455,8 @@ static struct pipe_surface *r600_create_surface(struct pipe_context *pipe,
surface->base.u.tex.last_layer = surf_tmpl->u.tex.last_layer;
surface->base.u.tex.level = level;
- tile_height = r600_get_height_alignment(pipe->screen, rtex->array_mode[level]);
- surface->aligned_height = align(surface->base.height, tile_height);
+ surface->aligned_height = r600_texture_get_nblocksy(pipe->screen,
+ rtex, level);
return &surface->base;
}
@@ -435,18 +492,8 @@ struct pipe_resource *r600_texture_from_handle(struct pipe_screen *screen,
bo);
}
-static unsigned int r600_texture_is_referenced(struct pipe_context *context,
- struct pipe_resource *texture,
- unsigned level, int layer)
-{
- /* FIXME */
- return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE;
-}
-
-int (*r600_blit_uncompress_depth_ptr)(struct pipe_context *ctx, struct r600_resource_texture *texture);
-
int r600_texture_depth_flush(struct pipe_context *ctx,
- struct pipe_resource *texture)
+ struct pipe_resource *texture, boolean just_create)
{
struct r600_resource_texture *rtex = (struct r600_resource_texture*)texture;
struct pipe_resource resource;
@@ -459,7 +506,8 @@ int r600_texture_depth_flush(struct pipe_context *ctx,
resource.width0 = texture->width0;
resource.height0 = texture->height0;
resource.depth0 = 1;
- resource.last_level = 0;
+ resource.array_size = 1;
+ resource.last_level = texture->last_level;
resource.nr_samples = 0;
resource.usage = PIPE_USAGE_DYNAMIC;
resource.bind = 0;
@@ -473,10 +521,14 @@ int r600_texture_depth_flush(struct pipe_context *ctx,
return -ENOMEM;
}
+ ((struct r600_resource_texture *)rtex->flushed_depth_texture)->is_flushing_texture = TRUE;
out:
+ if (just_create)
+ return 0;
+
/* XXX: only do this if the depth texture has actually changed:
*/
- r600_blit_uncompress_depth_ptr(ctx, rtex);
+ r600_blit_uncompress_depth(ctx, rtex);
return 0;
}
@@ -487,46 +539,6 @@ static INLINE unsigned u_box_volume( const struct pipe_box *box )
return box->width * box->depth * box->height;
};
-
-/* Figure out whether u_blitter will fallback to a transfer operation.
- * If so, don't use a staging resource.
- */
-static boolean permit_hardware_blit(struct pipe_screen *screen,
- struct pipe_resource *res)
-{
- unsigned bind;
-
- if (util_format_is_depth_or_stencil(res->format))
- bind = PIPE_BIND_DEPTH_STENCIL;
- else
- bind = PIPE_BIND_RENDER_TARGET;
-
- /* See r600_resource_copy_region: there is something wrong
- * with depth resource copies at the moment so avoid them for
- * now.
- */
- if (util_format_get_component_bits(res->format,
- UTIL_FORMAT_COLORSPACE_ZS,
- 0) != 0)
- return FALSE;
-
- if (!screen->is_format_supported(screen,
- res->format,
- res->target,
- res->nr_samples,
- bind, 0))
- return FALSE;
-
- if (!screen->is_format_supported(screen,
- res->format,
- res->target,
- res->nr_samples,
- PIPE_BIND_SAMPLER_VIEW, 0))
- return FALSE;
-
- return TRUE;
-}
-
struct pipe_transfer* r600_texture_get_transfer(struct pipe_context *ctx,
struct pipe_resource *texture,
unsigned level,
@@ -546,7 +558,7 @@ struct pipe_transfer* r600_texture_get_transfer(struct pipe_context *ctx,
* the CPU is much happier reading out of cached system memory
* than uncached VRAM.
*/
- if (rtex->tiled)
+ if (R600_TEX_IS_TILED(rtex, level))
use_staging_texture = TRUE;
if ((usage & PIPE_TRANSFER_READ) && u_box_volume(box) > 1024)
@@ -579,13 +591,16 @@ struct pipe_transfer* r600_texture_get_transfer(struct pipe_context *ctx,
*/
/* XXX: when discard is true, no need to read back from depth texture
*/
- r = r600_texture_depth_flush(ctx, texture);
+ r = r600_texture_depth_flush(ctx, texture, FALSE);
if (r < 0) {
R600_ERR("failed to create temporary texture to hold untiled copy\n");
pipe_resource_reference(&trans->transfer.resource, NULL);
FREE(trans);
return NULL;
}
+ trans->transfer.stride = rtex->flushed_depth_texture->pitch_in_bytes[level];
+ trans->offset = r600_texture_get_offset(rtex->flushed_depth_texture, level, box->z);
+ return &trans->transfer;
} else if (use_staging_texture) {
resource.target = PIPE_TEXTURE_2D;
resource.format = texture->format;
@@ -622,11 +637,12 @@ struct pipe_transfer* r600_texture_get_transfer(struct pipe_context *ctx,
if (usage & PIPE_TRANSFER_READ) {
r600_copy_to_staging_texture(ctx, trans);
/* Always referenced in the blit. */
- ctx->flush(ctx, 0, NULL);
+ ctx->flush(ctx, NULL);
}
return &trans->transfer;
}
trans->transfer.stride = rtex->pitch_in_bytes[level];
+ trans->transfer.layer_stride = rtex->layer_size[level];
trans->offset = r600_texture_get_offset(rtex, level, box->z);
return &trans->transfer;
}
@@ -635,7 +651,8 @@ void r600_texture_transfer_destroy(struct pipe_context *ctx,
struct pipe_transfer *transfer)
{
struct r600_transfer *rtransfer = (struct r600_transfer*)transfer;
- struct r600_resource_texture *rtex = (struct r600_resource_texture*)transfer->resource;
+ struct pipe_resource *texture = transfer->resource;
+ struct r600_resource_texture *rtex = (struct r600_resource_texture*)texture;
if (rtransfer->staging_texture) {
if (transfer->usage & PIPE_TRANSFER_WRITE) {
@@ -643,9 +660,12 @@ void r600_texture_transfer_destroy(struct pipe_context *ctx,
}
pipe_resource_reference(&rtransfer->staging_texture, NULL);
}
- if (rtex->flushed_depth_texture) {
- pipe_resource_reference((struct pipe_resource **)&rtex->flushed_depth_texture, NULL);
+
+ if (rtex->depth && !rtex->is_flushing_texture) {
+ if ((transfer->usage & PIPE_TRANSFER_WRITE) && rtex->flushed_depth_texture)
+ r600_blit_push_depth(ctx, rtex);
}
+
pipe_resource_reference(&transfer->resource, NULL);
FREE(transfer);
}
@@ -727,19 +747,6 @@ void r600_texture_transfer_unmap(struct pipe_context *ctx,
r600_bo_unmap(radeon, bo);
}
-struct u_resource_vtbl r600_texture_vtbl =
-{
- r600_texture_get_handle, /* get_handle */
- r600_texture_destroy, /* resource_destroy */
- r600_texture_is_referenced, /* is_resource_referenced */
- r600_texture_get_transfer, /* get_transfer */
- r600_texture_transfer_destroy, /* transfer_destroy */
- r600_texture_transfer_map, /* transfer_map */
- u_default_transfer_flush_region,/* transfer_flush_region */
- r600_texture_transfer_unmap, /* transfer_unmap */
- u_default_transfer_inline_write /* transfer_inline_write */
-};
-
void r600_init_surface_functions(struct r600_pipe_context *r600)
{
r600->context.create_surface = r600_create_surface;
@@ -795,13 +802,16 @@ static unsigned r600_get_swizzle_combined(const unsigned char *swizzle_format,
}
/* texture format translate */
-uint32_t r600_translate_texformat(enum pipe_format format,
- const unsigned char *swizzle_view,
+uint32_t r600_translate_texformat(struct pipe_screen *screen,
+ enum pipe_format format,
+ const unsigned char *swizzle_view,
uint32_t *word4_p, uint32_t *yuv_format_p)
{
uint32_t result = 0, word4 = 0, yuv_format = 0;
const struct util_format_description *desc;
boolean uniform = TRUE;
+ static int r600_enable_s3tc = -1;
+
int i;
const uint32_t sign_bit[4] = {
S_038010_FORMAT_COMP_X(V_038010_SQ_FORMAT_COMP_SIGNED),
@@ -850,37 +860,65 @@ uint32_t r600_translate_texformat(enum pipe_format format,
break;
}
goto out_unknown; /* TODO */
-
+
case UTIL_FORMAT_COLORSPACE_SRGB:
word4 |= S_038010_FORCE_DEGAMMA(1);
- if (format == PIPE_FORMAT_L8A8_SRGB || format == PIPE_FORMAT_L8_SRGB)
- goto out_unknown; /* fails for some reason - TODO */
break;
default:
break;
}
- /* S3TC formats. TODO */
- if (desc->layout == UTIL_FORMAT_LAYOUT_S3TC) {
- static int r600_enable_s3tc = -1;
+ if (r600_enable_s3tc == -1) {
+ struct r600_screen *rscreen = (struct r600_screen *)screen;
+ if (r600_get_minor_version(rscreen->radeon) >= 9)
+ r600_enable_s3tc = 1;
+ else
+ r600_enable_s3tc = debug_get_bool_option("R600_ENABLE_S3TC", FALSE);
+ }
+
+ if (desc->layout == UTIL_FORMAT_LAYOUT_RGTC) {
+ if (!r600_enable_s3tc)
+ goto out_unknown;
- if (r600_enable_s3tc == -1)
- r600_enable_s3tc =
- debug_get_bool_option("R600_ENABLE_S3TC", FALSE);
+ switch (format) {
+ case PIPE_FORMAT_RGTC1_SNORM:
+ word4 |= sign_bit[0];
+ case PIPE_FORMAT_RGTC1_UNORM:
+ result = FMT_BC4;
+ goto out_word4;
+ case PIPE_FORMAT_RGTC2_SNORM:
+ word4 |= sign_bit[0] | sign_bit[1];
+ case PIPE_FORMAT_RGTC2_UNORM:
+ result = FMT_BC5;
+ goto out_word4;
+ default:
+ goto out_unknown;
+ }
+ }
+
+ if (desc->layout == UTIL_FORMAT_LAYOUT_S3TC) {
if (!r600_enable_s3tc)
goto out_unknown;
+ if (!util_format_s3tc_enabled) {
+ goto out_unknown;
+ }
+
switch (format) {
case PIPE_FORMAT_DXT1_RGB:
case PIPE_FORMAT_DXT1_RGBA:
+ case PIPE_FORMAT_DXT1_SRGB:
+ case PIPE_FORMAT_DXT1_SRGBA:
result = FMT_BC1;
goto out_word4;
case PIPE_FORMAT_DXT3_RGBA:
+ case PIPE_FORMAT_DXT3_SRGBA:
result = FMT_BC2;
goto out_word4;
case PIPE_FORMAT_DXT5_RGBA:
+ case PIPE_FORMAT_DXT5_SRGBA:
result = FMT_BC3;
goto out_word4;
default:
@@ -889,7 +927,7 @@ uint32_t r600_translate_texformat(enum pipe_format format,
}
- for (i = 0; i < desc->nr_channels; i++) {
+ for (i = 0; i < desc->nr_channels; i++) {
if (desc->channel[i].type == UTIL_FORMAT_TYPE_SIGNED) {
word4 |= sign_bit[i];
}
@@ -897,13 +935,11 @@ uint32_t r600_translate_texformat(enum pipe_format format,
/* R8G8Bx_SNORM - TODO CxV8U8 */
- /* RGTC - TODO */
-
/* See whether the components are of the same size. */
for (i = 1; i < desc->nr_channels; i++) {
uniform = uniform && desc->channel[0].size == desc->channel[i].size;
}
-
+
/* Non-uniform formats. */
if (!uniform) {
switch(desc->nr_channels) {
@@ -927,7 +963,7 @@ uint32_t r600_translate_texformat(enum pipe_format format,
desc->channel[1].size == 10 &&
desc->channel[2].size == 10 &&
desc->channel[3].size == 2) {
- result = FMT_10_10_10_2;
+ result = FMT_2_10_10_10;
goto out_word4;
}
goto out_unknown;
@@ -990,6 +1026,19 @@ uint32_t r600_translate_texformat(enum pipe_format format,
result = FMT_16_16_16_16;
goto out_word4;
}
+ goto out_unknown;
+ case 32:
+ switch (desc->nr_channels) {
+ case 1:
+ result = FMT_32;
+ goto out_word4;
+ case 2:
+ result = FMT_32_32;
+ goto out_word4;
+ case 4:
+ result = FMT_32_32_32_32;
+ goto out_word4;
+ }
}
goto out_unknown;
@@ -1021,7 +1070,7 @@ uint32_t r600_translate_texformat(enum pipe_format format,
goto out_word4;
}
}
-
+
}
out_word4:
if (word4_p)
diff --git a/src/gallium/drivers/r600/r600_translate.c b/src/gallium/drivers/r600/r600_translate.c
index 1c227d3215..7482d15e12 100644
--- a/src/gallium/drivers/r600/r600_translate.c
+++ b/src/gallium/drivers/r600/r600_translate.c
@@ -22,181 +22,34 @@
*
* Authors: Dave Airlie <airlied@redhat.com>
*/
-#include "translate/translate_cache.h"
-#include "translate/translate.h"
-#include <pipebuffer/pb_buffer.h>
+
#include <util/u_index_modify.h>
+#include "util/u_inlines.h"
+#include "util/u_upload_mgr.h"
#include "r600_pipe.h"
-void r600_begin_vertex_translate(struct r600_pipe_context *rctx)
-{
- struct pipe_context *pipe = &rctx->context;
- struct translate_key key = {0};
- struct translate_element *te;
- unsigned tr_elem_index[PIPE_MAX_ATTRIBS] = {0};
- struct translate *tr;
- struct r600_vertex_element *ve = rctx->vertex_elements;
- boolean vb_translated[PIPE_MAX_ATTRIBS] = {0};
- void *vb_map[PIPE_MAX_ATTRIBS] = {0}, *out_map;
- struct pipe_transfer *vb_transfer[PIPE_MAX_ATTRIBS] = {0}, *out_transfer;
- struct pipe_resource *out_buffer;
- unsigned i, num_verts;
- struct pipe_vertex_element new_velems[PIPE_MAX_ATTRIBS];
-
- /* Initialize the translate key, i.e. the recipe how vertices should be
- * translated. */
- for (i = 0; i < ve->count; i++) {
- struct pipe_vertex_buffer *vb =
- &rctx->vertex_buffer[ve->elements[i].vertex_buffer_index];
- enum pipe_format output_format = ve->hw_format[i];
- unsigned output_format_size = ve->hw_format_size[i];
-
- /* Check for support. */
- if (ve->elements[i].src_format == ve->hw_format[i]) {
- continue;
- }
-
- /* Workaround for translate: output floats instead of halfs. */
- switch (output_format) {
- case PIPE_FORMAT_R16_FLOAT:
- output_format = PIPE_FORMAT_R32_FLOAT;
- output_format_size = 4;
- break;
- case PIPE_FORMAT_R16G16_FLOAT:
- output_format = PIPE_FORMAT_R32G32_FLOAT;
- output_format_size = 8;
- break;
- case PIPE_FORMAT_R16G16B16_FLOAT:
- output_format = PIPE_FORMAT_R32G32B32_FLOAT;
- output_format_size = 12;
- break;
- case PIPE_FORMAT_R16G16B16A16_FLOAT:
- output_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
- output_format_size = 16;
- break;
- default:;
- }
-
- /* Add this vertex element. */
- te = &key.element[key.nr_elements];
- /*te->type;
- te->instance_divisor;*/
- te->input_buffer = ve->elements[i].vertex_buffer_index;
- te->input_format = ve->elements[i].src_format;
- te->input_offset = vb->buffer_offset + ve->elements[i].src_offset;
- te->output_format = output_format;
- te->output_offset = key.output_stride;
-
- key.output_stride += output_format_size;
- vb_translated[ve->elements[i].vertex_buffer_index] = TRUE;
- tr_elem_index[i] = key.nr_elements;
- key.nr_elements++;
- }
-
- /* Get a translate object. */
- tr = translate_cache_find(rctx->tran.translate_cache, &key);
-
- /* Map buffers we want to translate. */
- for (i = 0; i < rctx->nvertex_buffer; i++) {
- if (vb_translated[i]) {
- struct pipe_vertex_buffer *vb = &rctx->vertex_buffer[i];
-
- vb_map[i] = pipe_buffer_map(pipe, vb->buffer,
- PIPE_TRANSFER_READ, &vb_transfer[i]);
-
- tr->set_buffer(tr, i, vb_map[i], vb->stride, vb->max_index);
- }
- }
-
- /* Create and map the output buffer. */
- num_verts = rctx->vb_max_index + 1;
-
- out_buffer = pipe_buffer_create(&rctx->screen->screen,
- PIPE_BIND_VERTEX_BUFFER,
- key.output_stride * num_verts);
-
- out_map = pipe_buffer_map(pipe, out_buffer, PIPE_TRANSFER_WRITE,
- &out_transfer);
-
- /* Translate. */
- tr->run(tr, 0, num_verts, 0, out_map);
-
- /* Unmap all buffers. */
- for (i = 0; i < rctx->nvertex_buffer; i++) {
- if (vb_translated[i]) {
- pipe_buffer_unmap(pipe, rctx->vertex_buffer[i].buffer,
- vb_transfer[i]);
- }
- }
-
- pipe_buffer_unmap(pipe, out_buffer, out_transfer);
-
- /* Setup the new vertex buffer in the first free slot. */
- for (i = 0; i < PIPE_MAX_ATTRIBS; i++) {
- struct pipe_vertex_buffer *vb = &rctx->vertex_buffer[i];
-
- if (!vb->buffer) {
- pipe_resource_reference(&vb->buffer, out_buffer);
- vb->buffer_offset = 0;
- vb->max_index = num_verts - 1;
- vb->stride = key.output_stride;
- rctx->tran.vb_slot = i;
- break;
- }
- }
-
- /* Save and replace vertex elements. */
- for (i = 0; i < ve->count; i++) {
- if (vb_translated[ve->elements[i].vertex_buffer_index]) {
- te = &key.element[tr_elem_index[i]];
- new_velems[i].instance_divisor = ve->elements[i].instance_divisor;
- new_velems[i].src_format = te->output_format;
- new_velems[i].src_offset = te->output_offset;
- new_velems[i].vertex_buffer_index = rctx->tran.vb_slot;
- } else {
- memcpy(&new_velems[i], &ve->elements[i],
- sizeof(struct pipe_vertex_element));
- }
- }
-
- rctx->tran.new_velems = pipe->create_vertex_elements_state(pipe, ve->count, new_velems);
- pipe->bind_vertex_elements_state(pipe, rctx->tran.new_velems);
-
- pipe_resource_reference(&out_buffer, NULL);
-}
-
-void r600_end_vertex_translate(struct r600_pipe_context *rctx)
-{
- struct pipe_context *pipe = &rctx->context;
-
- if (rctx->tran.new_velems == NULL) {
- return;
- }
- /* Restore vertex elements. */
- if (rctx->vertex_elements == rctx->tran.new_velems) {
- pipe->bind_vertex_elements_state(pipe, NULL);
- }
- pipe->delete_vertex_elements_state(pipe, rctx->tran.new_velems);
- rctx->tran.new_velems = NULL;
-
- /* Delete the now-unused VBO. */
- pipe_resource_reference(&rctx->vertex_buffer[rctx->tran.vb_slot].buffer,
- NULL);
-}
void r600_translate_index_buffer(struct r600_pipe_context *r600,
- struct pipe_resource **index_buffer,
- unsigned *index_size,
- unsigned *start, unsigned count)
+ struct pipe_resource **index_buffer,
+ unsigned *index_size,
+ unsigned *start, unsigned count)
{
+ struct pipe_resource *out_buffer = NULL;
+ unsigned out_offset;
+ void *ptr;
+ boolean flushed;
+
switch (*index_size) {
case 1:
- util_shorten_ubyte_elts(&r600->context, index_buffer, 0, *start, count);
+ u_upload_alloc(r600->vbuf_mgr->uploader, 0, count * 2,
+ &out_offset, &out_buffer, &flushed, &ptr);
+
+ util_shorten_ubyte_elts_to_userptr(
+ &r600->context, *index_buffer, 0, *start, count, ptr);
+
+ pipe_resource_reference(index_buffer, out_buffer);
*index_size = 2;
- *start = 0;
- break;
- case 2:
- case 4:
+ *start = out_offset / 2;
break;
}
}
diff --git a/src/gallium/drivers/r600/r600d.h b/src/gallium/drivers/r600/r600d.h
index ae19bfb828..df70e2889e 100644
--- a/src/gallium/drivers/r600/r600d.h
+++ b/src/gallium/drivers/r600/r600d.h
@@ -67,6 +67,10 @@
#define PKT3_SET_CTL_CONST 0x6F
#define PKT3_SURFACE_BASE_UPDATE 0x73
+#define PREDICATION_OP_CLEAR 0x0
+#define PREDICATION_OP_ZPASS 0x1
+#define PREDICATION_OP_PRIMCOUNT 0x2
+
#define PKT_TYPE_S(x) (((x) & 0x3) << 30)
#define PKT_TYPE_G(x) (((x) >> 30) & 0x3)
#define PKT_TYPE_C 0x3FFFFFFF
@@ -248,6 +252,8 @@
#define S_0280A0_SOURCE_FORMAT(x) (((x) & 0x1) << 27)
#define G_0280A0_SOURCE_FORMAT(x) (((x) >> 27) & 0x1)
#define C_0280A0_SOURCE_FORMAT 0xF7FFFFFF
+#define V_0280A0_EXPORT_FULL 0
+#define V_0280A0_EXPORT_NORM 1
#define R_028060_CB_COLOR0_SIZE 0x028060
#define S_028060_PITCH_TILE_MAX(x) (((x) & 0x3FF) << 0)
#define G_028060_PITCH_TILE_MAX(x) (((x) >> 0) & 0x3FF)
@@ -960,8 +966,8 @@
#define S_038010_SRF_MODE_ALL(x) (((x) & 0x1) << 10)
#define G_038010_SRF_MODE_ALL(x) (((x) >> 10) & 0x1)
#define C_038010_SRF_MODE_ALL 0xFFFFFBFF
-#define V_038010_SFR_MODE_ZERO_CLAMP_MINUS_ONE 0x00000000
-#define V_038010_SFR_MODE_NO_ZERO 0x00000001
+#define V_038010_SRF_MODE_ZERO_CLAMP_MINUS_ONE 0x00000000
+#define V_038010_SRF_MODE_NO_ZERO 0x00000001
#define S_038010_FORCE_DEGAMMA(x) (((x) & 0x1) << 11)
#define G_038010_FORCE_DEGAMMA(x) (((x) >> 11) & 0x1)
#define C_038010_FORCE_DEGAMMA 0xFFFFF7FF
@@ -2332,31 +2338,6 @@
#define R_0280D4_CB_COLOR5_TILE 0x0280D4
#define R_0280D8_CB_COLOR6_TILE 0x0280D8
#define R_0280DC_CB_COLOR7_TILE 0x0280DC
-#define R_028808_CB_COLOR_CONTROL 0x028808
-#define S_028808_FOG_ENABLE(x) (((x) & 0x1) << 0)
-#define G_028808_FOG_ENABLE(x) (((x) >> 0) & 0x1)
-#define C_028808_FOG_ENABLE 0xFFFFFFFE
-#define S_028808_MULTIWRITE_ENABLE(x) (((x) & 0x1) << 1)
-#define G_028808_MULTIWRITE_ENABLE(x) (((x) >> 1) & 0x1)
-#define C_028808_MULTIWRITE_ENABLE 0xFFFFFFFD
-#define S_028808_DITHER_ENABLE(x) (((x) & 0x1) << 2)
-#define G_028808_DITHER_ENABLE(x) (((x) >> 2) & 0x1)
-#define C_028808_DITHER_ENABLE 0xFFFFFFFB
-#define S_028808_DEGAMMA_ENABLE(x) (((x) & 0x1) << 3)
-#define G_028808_DEGAMMA_ENABLE(x) (((x) >> 3) & 0x1)
-#define C_028808_DEGAMMA_ENABLE 0xFFFFFFF7
-#define S_028808_SPECIAL_OP(x) (((x) & 0x7) << 4)
-#define G_028808_SPECIAL_OP(x) (((x) >> 4) & 0x7)
-#define C_028808_SPECIAL_OP 0xFFFFFF8F
-#define S_028808_PER_MRT_BLEND(x) (((x) & 0x1) << 7)
-#define G_028808_PER_MRT_BLEND(x) (((x) >> 7) & 0x1)
-#define C_028808_PER_MRT_BLEND 0xFFFFFF7F
-#define S_028808_TARGET_BLEND_ENABLE(x) (((x) & 0xFF) << 8)
-#define G_028808_TARGET_BLEND_ENABLE(x) (((x) >> 8) & 0xFF)
-#define C_028808_TARGET_BLEND_ENABLE 0xFFFF00FF
-#define S_028808_ROP3(x) (((x) & 0xFF) << 16)
-#define G_028808_ROP3(x) (((x) >> 16) & 0xFF)
-#define C_028808_ROP3 0xFF00FFFF
#define R_028614_SPI_VS_OUT_ID_0 0x028614
#define S_028614_SEMANTIC_0(x) (((x) & 0xFF) << 0)
#define G_028614_SEMANTIC_0(x) (((x) >> 0) & 0xFF)
diff --git a/src/gallium/drivers/r600/r700_asm.c b/src/gallium/drivers/r600/r700_asm.c
index 892dee86ba..b3c7d1494f 100644
--- a/src/gallium/drivers/r600/r700_asm.c
+++ b/src/gallium/drivers/r600/r700_asm.c
@@ -26,11 +26,18 @@
#include "r600_asm.h"
#include "r700_sq.h"
+void r700_bc_cf_vtx_build(uint32_t *bytecode, const struct r600_bc_cf *cf)
+{
+ unsigned count = (cf->ndw / 4) - 1;
+ *bytecode++ = S_SQ_CF_WORD0_ADDR(cf->addr >> 1);
+ *bytecode++ = S_SQ_CF_WORD1_CF_INST(cf->inst) |
+ S_SQ_CF_WORD1_BARRIER(1) |
+ S_SQ_CF_WORD1_COUNT(count) |
+ S_SQ_CF_WORD1_COUNT_3(count >> 3);
+}
int r700_bc_alu_build(struct r600_bc *bc, struct r600_bc_alu *alu, unsigned id)
{
- unsigned i;
-
bc->bytecode[id++] = S_SQ_ALU_WORD0_SRC0_SEL(alu->src[0].sel) |
S_SQ_ALU_WORD0_SRC0_REL(alu->src[0].rel) |
S_SQ_ALU_WORD0_SRC0_CHAN(alu->src[0].chan) |
@@ -61,18 +68,11 @@ int r700_bc_alu_build(struct r600_bc *bc, struct r600_bc_alu *alu, unsigned id)
S_SQ_ALU_WORD1_OP2_SRC0_ABS(alu->src[0].abs) |
S_SQ_ALU_WORD1_OP2_SRC1_ABS(alu->src[1].abs) |
S_SQ_ALU_WORD1_OP2_WRITE_MASK(alu->dst.write) |
+ S_SQ_ALU_WORD1_OP2_OMOD(alu->omod) |
S_SQ_ALU_WORD1_OP2_ALU_INST(alu->inst) |
S_SQ_ALU_WORD1_BANK_SWIZZLE(alu->bank_swizzle) |
S_SQ_ALU_WORD1_OP2_UPDATE_EXECUTE_MASK(alu->predicate) |
S_SQ_ALU_WORD1_OP2_UPDATE_PRED(alu->predicate);
}
- if (alu->last) {
- if (alu->nliteral && !alu->literal_added) {
- R600_ERR("Bug in ALU processing for instruction 0x%08x, literal not added correctly\n", alu->inst);
- }
- for (i = 0; i < alu->nliteral; i++) {
- bc->bytecode[id++] = alu->value[i];
- }
- }
return 0;
}