summaryrefslogtreecommitdiff
path: root/src/gallium/drivers/r600/r600_asm.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium/drivers/r600/r600_asm.c')
-rw-r--r--src/gallium/drivers/r600/r600_asm.c317
1 files changed, 317 insertions, 0 deletions
diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c
index 73daa00080..e13c606434 100644
--- a/src/gallium/drivers/r600/r600_asm.c
+++ b/src/gallium/drivers/r600/r600_asm.c
@@ -22,12 +22,15 @@
*/
#include <stdio.h>
#include <errno.h>
+#include "util/u_format.h"
#include "util/u_memory.h"
#include "pipe/p_shader_tokens.h"
#include "r600_pipe.h"
#include "r600_sq.h"
#include "r600_opcodes.h"
#include "r600_asm.h"
+#include "r600_formats.h"
+#include "r600d.h"
static inline unsigned int r600_bc_get_num_operands(struct r600_bc_alu *alu)
{
@@ -972,3 +975,317 @@ void r600_bc_dump(struct r600_bc *bc)
}
fprintf(stderr, "--------------------------------------\n");
}
+
+void r600_cf_vtx(struct r600_vertex_element *ve, u32 *bytecode, unsigned count)
+{
+ struct r600_pipe_state *rstate;
+ unsigned i = 0;
+
+ if (count > 8) {
+ bytecode[i++] = S_SQ_CF_WORD0_ADDR(8 >> 1);
+ bytecode[i++] = S_SQ_CF_WORD1_CF_INST(V_SQ_CF_WORD1_SQ_CF_INST_VTX) |
+ S_SQ_CF_WORD1_BARRIER(1) |
+ S_SQ_CF_WORD1_COUNT(8 - 1);
+ bytecode[i++] = S_SQ_CF_WORD0_ADDR(40 >> 1);
+ bytecode[i++] = S_SQ_CF_WORD1_CF_INST(V_SQ_CF_WORD1_SQ_CF_INST_VTX) |
+ S_SQ_CF_WORD1_BARRIER(1) |
+ S_SQ_CF_WORD1_COUNT(count - 8 - 1);
+ } else {
+ bytecode[i++] = S_SQ_CF_WORD0_ADDR(8 >> 1);
+ bytecode[i++] = S_SQ_CF_WORD1_CF_INST(V_SQ_CF_WORD1_SQ_CF_INST_VTX) |
+ S_SQ_CF_WORD1_BARRIER(1) |
+ S_SQ_CF_WORD1_COUNT(count - 1);
+ }
+ bytecode[i++] = S_SQ_CF_WORD0_ADDR(0);
+ bytecode[i++] = S_SQ_CF_WORD1_CF_INST(V_SQ_CF_WORD1_SQ_CF_INST_RETURN) |
+ S_SQ_CF_WORD1_BARRIER(1);
+
+ rstate = &ve->rstate;
+ rstate->id = R600_PIPE_STATE_FETCH_SHADER;
+ rstate->nregs = 0;
+ r600_pipe_state_add_reg(rstate, R_0288A4_SQ_PGM_RESOURCES_FS,
+ 0x00000000, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_0288DC_SQ_PGM_CF_OFFSET_FS,
+ 0x00000000, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_028894_SQ_PGM_START_FS,
+ r600_bo_offset(ve->fetch_shader) >> 8,
+ 0xFFFFFFFF, ve->fetch_shader);
+}
+
+void r600_cf_vtx_tc(struct r600_vertex_element *ve, u32 *bytecode, unsigned count)
+{
+ struct r600_pipe_state *rstate;
+ unsigned i = 0;
+
+ if (count > 8) {
+ bytecode[i++] = S_SQ_CF_WORD0_ADDR(8 >> 1);
+ bytecode[i++] = S_SQ_CF_WORD1_CF_INST(V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC) |
+ S_SQ_CF_WORD1_BARRIER(1) |
+ S_SQ_CF_WORD1_COUNT(8 - 1);
+ bytecode[i++] = S_SQ_CF_WORD0_ADDR(40 >> 1);
+ bytecode[i++] = S_SQ_CF_WORD1_CF_INST(V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC) |
+ S_SQ_CF_WORD1_BARRIER(1) |
+ S_SQ_CF_WORD1_COUNT((count - 8) - 1);
+ } else {
+ bytecode[i++] = S_SQ_CF_WORD0_ADDR(8 >> 1);
+ bytecode[i++] = S_SQ_CF_WORD1_CF_INST(V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC) |
+ S_SQ_CF_WORD1_BARRIER(1) |
+ S_SQ_CF_WORD1_COUNT(count - 1);
+ }
+ bytecode[i++] = S_SQ_CF_WORD0_ADDR(0);
+ bytecode[i++] = S_SQ_CF_WORD1_CF_INST(V_SQ_CF_WORD1_SQ_CF_INST_RETURN) |
+ S_SQ_CF_WORD1_BARRIER(1);
+
+ rstate = &ve->rstate;
+ rstate->id = R600_PIPE_STATE_FETCH_SHADER;
+ rstate->nregs = 0;
+ r600_pipe_state_add_reg(rstate, R_0288A4_SQ_PGM_RESOURCES_FS,
+ 0x00000000, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_0288DC_SQ_PGM_CF_OFFSET_FS,
+ 0x00000000, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_028894_SQ_PGM_START_FS,
+ r600_bo_offset(ve->fetch_shader) >> 8,
+ 0xFFFFFFFF, ve->fetch_shader);
+}
+
+static void r600_vertex_data_type(enum pipe_format pformat, unsigned *format,
+ unsigned *num_format, unsigned *format_comp)
+{
+ const struct util_format_description *desc;
+ unsigned i;
+
+ *format = 0;
+ *num_format = 0;
+ *format_comp = 0;
+
+ desc = util_format_description(pformat);
+ if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN) {
+ goto out_unknown;
+ }
+
+ /* Find the first non-VOID channel. */
+ for (i = 0; i < 4; i++) {
+ if (desc->channel[i].type != UTIL_FORMAT_TYPE_VOID) {
+ break;
+ }
+ }
+
+ switch (desc->channel[i].type) {
+ /* Half-floats, floats, doubles */
+ case UTIL_FORMAT_TYPE_FLOAT:
+ switch (desc->channel[i].size) {
+ case 16:
+ switch (desc->nr_channels) {
+ case 1:
+ *format = FMT_16_FLOAT;
+ break;
+ case 2:
+ *format = FMT_16_16_FLOAT;
+ break;
+ case 3:
+ *format = FMT_16_16_16_FLOAT;
+ break;
+ case 4:
+ *format = FMT_16_16_16_16_FLOAT;
+ break;
+ }
+ break;
+ case 32:
+ switch (desc->nr_channels) {
+ case 1:
+ *format = FMT_32_FLOAT;
+ break;
+ case 2:
+ *format = FMT_32_32_FLOAT;
+ break;
+ case 3:
+ *format = FMT_32_32_32_FLOAT;
+ break;
+ case 4:
+ *format = FMT_32_32_32_32_FLOAT;
+ break;
+ }
+ break;
+ default:
+ goto out_unknown;
+ }
+ break;
+ /* Unsigned ints */
+ case UTIL_FORMAT_TYPE_UNSIGNED:
+ /* Signed ints */
+ case UTIL_FORMAT_TYPE_SIGNED:
+ switch (desc->channel[i].size) {
+ case 8:
+ switch (desc->nr_channels) {
+ case 1:
+ *format = FMT_8;
+ break;
+ case 2:
+ *format = FMT_8_8;
+ break;
+ case 3:
+ // *format = FMT_8_8_8; /* fails piglit draw-vertices test */
+ // break;
+ case 4:
+ *format = FMT_8_8_8_8;
+ break;
+ }
+ break;
+ case 16:
+ switch (desc->nr_channels) {
+ case 1:
+ *format = FMT_16;
+ break;
+ case 2:
+ *format = FMT_16_16;
+ break;
+ case 3:
+ // *format = FMT_16_16_16; /* fails piglit draw-vertices test */
+ // break;
+ case 4:
+ *format = FMT_16_16_16_16;
+ break;
+ }
+ break;
+ case 32:
+ switch (desc->nr_channels) {
+ case 1:
+ *format = FMT_32;
+ break;
+ case 2:
+ *format = FMT_32_32;
+ break;
+ case 3:
+ *format = FMT_32_32_32;
+ break;
+ case 4:
+ *format = FMT_32_32_32_32;
+ break;
+ }
+ break;
+ default:
+ goto out_unknown;
+ }
+ break;
+ default:
+ goto out_unknown;
+ }
+
+ if (desc->channel[i].type == UTIL_FORMAT_TYPE_SIGNED) {
+ *format_comp = 1;
+ }
+ if (desc->channel[i].normalized) {
+ *num_format = 0;
+ } else {
+ *num_format = 2;
+ }
+ return;
+out_unknown:
+ R600_ERR("unsupported vertex format %s\n", util_format_name(pformat));
+}
+
+void r600_bc(unsigned ndw, unsigned chiprev, u32 *bytecode)
+{
+ unsigned i;
+ char chip = '6';
+
+ switch (chiprev) {
+ case 1:
+ chip = '7';
+ break;
+ case 2:
+ chip = 'E';
+ break;
+ case 0:
+ default:
+ chip = '6';
+ break;
+ }
+ fprintf(stderr, "bytecode %d dw -----------------------\n", ndw);
+ fprintf(stderr, " %c\n", chip);
+ for (i = 0; i < ndw; i++) {
+ fprintf(stderr, "0x%08X\n", bytecode[i]);
+ }
+ fprintf(stderr, "--------------------------------------\n");
+}
+
+int r600_vertex_elements_build_fetch_shader(struct r600_pipe_context *rctx, struct r600_vertex_element *ve)
+{
+ unsigned ndw, i;
+ u32 *bytecode;
+ unsigned fetch_resource_start = 0, format, num_format, format_comp;
+ struct pipe_vertex_element *elements = ve->elements;
+ const struct util_format_description *desc;
+
+ /* 2 dwords for cf aligned to 4 + 4 dwords per input */
+ ndw = 8 + ve->count * 4;
+ ve->fs_size = ndw * 4;
+
+ /* use PIPE_BIND_VERTEX_BUFFER so we use the cache buffer manager */
+ ve->fetch_shader = r600_bo(rctx->radeon, ndw*4, 256, PIPE_BIND_VERTEX_BUFFER, 0);
+ if (ve->fetch_shader == NULL) {
+ return -ENOMEM;
+ }
+
+ bytecode = r600_bo_map(rctx->radeon, ve->fetch_shader, 0, NULL);
+ if (bytecode == NULL) {
+ r600_bo_reference(rctx->radeon, &ve->fetch_shader, NULL);
+ return -ENOMEM;
+ }
+
+ if (rctx->family >= CHIP_CEDAR) {
+ eg_cf_vtx(ve, &bytecode[0], (ndw - 8) / 4);
+ } else {
+ r600_cf_vtx(ve, &bytecode[0], (ndw - 8) / 4);
+ fetch_resource_start = 160;
+ }
+
+ /* vertex elements offset need special handling, if offset is bigger
+ * than what we can put in fetch instruction then we need to alterate
+ * the vertex resource offset. In such case in order to simplify code
+ * we will bound one resource per elements. It's a worst case scenario.
+ */
+ for (i = 0; i < ve->count; i++) {
+ ve->vbuffer_offset[i] = C_SQ_VTX_WORD2_OFFSET & elements[i].src_offset;
+ if (ve->vbuffer_offset[i]) {
+ ve->vbuffer_need_offset = 1;
+ }
+ }
+
+ for (i = 0; i < ve->count; i++) {
+ unsigned vbuffer_index;
+ r600_vertex_data_type(ve->hw_format[i], &format, &num_format, &format_comp);
+ desc = util_format_description(ve->hw_format[i]);
+ if (desc == NULL) {
+ R600_ERR("unknown format %d\n", ve->hw_format[i]);
+ r600_bo_reference(rctx->radeon, &ve->fetch_shader, NULL);
+ return -EINVAL;
+ }
+
+ /* see above for vbuffer_need_offset explanation */
+ vbuffer_index = elements[i].vertex_buffer_index;
+ if (ve->vbuffer_need_offset) {
+ bytecode[8 + i * 4 + 0] = S_SQ_VTX_WORD0_BUFFER_ID(i + fetch_resource_start);
+ } else {
+ bytecode[8 + i * 4 + 0] = S_SQ_VTX_WORD0_BUFFER_ID(vbuffer_index + fetch_resource_start);
+ }
+ bytecode[8 + i * 4 + 0] |= S_SQ_VTX_WORD0_SRC_GPR(0) |
+ S_SQ_VTX_WORD0_SRC_SEL_X(0) |
+ S_SQ_VTX_WORD0_MEGA_FETCH_COUNT(0x1F);
+ bytecode[8 + i * 4 + 1] = S_SQ_VTX_WORD1_DST_SEL_X(desc->swizzle[0]) |
+ S_SQ_VTX_WORD1_DST_SEL_Y(desc->swizzle[1]) |
+ S_SQ_VTX_WORD1_DST_SEL_Z(desc->swizzle[2]) |
+ S_SQ_VTX_WORD1_DST_SEL_W(desc->swizzle[3]) |
+ S_SQ_VTX_WORD1_USE_CONST_FIELDS(0) |
+ S_SQ_VTX_WORD1_DATA_FORMAT(format) |
+ S_SQ_VTX_WORD1_NUM_FORMAT_ALL(num_format) |
+ S_SQ_VTX_WORD1_FORMAT_COMP_ALL(format_comp) |
+ S_SQ_VTX_WORD1_SRF_MODE_ALL(1) |
+ S_SQ_VTX_WORD1_GPR_DST_GPR(i + 1);
+ bytecode[8 + i * 4 + 2] = S_SQ_VTX_WORD2_OFFSET(elements[i].src_offset) |
+ S_SQ_VTX_WORD2_MEGA_FETCH(1);
+ bytecode[8 + i * 4 + 3] = 0;
+ }
+ r600_bo_unmap(rctx->radeon, ve->fetch_shader);
+ return 0;
+}