From de553d906b4a205d811a9e1651f14212ec284e29 Mon Sep 17 00:00:00 2001 From: Jerome Glisse Date: Fri, 23 Jul 2010 17:32:32 -0400 Subject: r600g: drop compiler stuff and switch over dumb tgsi assembler Writing a compiler is time consuming and error prone in order to allow r600g to further progress in the meantime i wrote a simple tgsi assembler, it does stupid thing but i would rather keep the code simple than having people trying to optimize code it does. Signed-off-by: Jerome Glisse --- src/gallium/drivers/r600/Makefile | 7 +- src/gallium/drivers/r600/r600_asm.c | 385 ++++++++++ src/gallium/drivers/r600/r600_asm.h | 112 +++ src/gallium/drivers/r600/r600_compiler.c | 447 ------------ src/gallium/drivers/r600/r600_compiler.h | 320 --------- src/gallium/drivers/r600/r600_compiler_dump.c | 267 ------- src/gallium/drivers/r600/r600_compiler_r600.c | 972 -------------------------- src/gallium/drivers/r600/r600_compiler_r700.c | 233 ------ src/gallium/drivers/r600/r600_compiler_tgsi.c | 730 ------------------- src/gallium/drivers/r600/r600_context.c | 2 +- src/gallium/drivers/r600/r600_context.h | 5 +- src/gallium/drivers/r600/r600_shader.c | 784 ++++++++++++++++++--- src/gallium/drivers/r600/r600_shader.h | 244 +------ src/gallium/drivers/r600/r600_sq.h | 18 +- src/gallium/drivers/r600/r600_state.c | 4 +- src/gallium/drivers/r600/r700_asm.c | 70 ++ 16 files changed, 1277 insertions(+), 3323 deletions(-) create mode 100644 src/gallium/drivers/r600/r600_asm.c create mode 100644 src/gallium/drivers/r600/r600_asm.h delete mode 100644 src/gallium/drivers/r600/r600_compiler.c delete mode 100644 src/gallium/drivers/r600/r600_compiler.h delete mode 100644 src/gallium/drivers/r600/r600_compiler_dump.c delete mode 100644 src/gallium/drivers/r600/r600_compiler_r600.c delete mode 100644 src/gallium/drivers/r600/r600_compiler_r700.c delete mode 100644 src/gallium/drivers/r600/r600_compiler_tgsi.c create mode 100644 src/gallium/drivers/r600/r700_asm.c (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/Makefile b/src/gallium/drivers/r600/Makefile index aae31a6a6e..8f1e1366b5 100644 --- a/src/gallium/drivers/r600/Makefile +++ b/src/gallium/drivers/r600/Makefile @@ -18,10 +18,7 @@ C_SOURCES = \ r600_state.c \ r600_texture.c \ r600_shader.c \ - r600_compiler.c \ - r600_compiler_tgsi.c \ - r600_compiler_dump.c \ - r600_compiler_r600.c \ - r600_compiler_r700.c + r600_asm.c \ + r700_asm.c include ../../Makefile.template diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c new file mode 100644 index 0000000000..6e48703a57 --- /dev/null +++ b/src/gallium/drivers/r600/r600_asm.c @@ -0,0 +1,385 @@ +/* + * Copyright 2010 Jerome Glisse + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#include "r600_asm.h" +#include "r600_context.h" +#include "util/u_memory.h" +#include "r600_sq.h" +#include +#include + +int r700_bc_alu_build(struct r600_bc *bc, struct r600_bc_alu *alu, unsigned id); + +static struct r600_bc_cf *r600_bc_cf(void) +{ + struct r600_bc_cf *cf = CALLOC_STRUCT(r600_bc_cf); + + if (cf == NULL) + return NULL; + LIST_INITHEAD(&cf->list); + LIST_INITHEAD(&cf->alu); + LIST_INITHEAD(&cf->vtx); + return cf; +} + +static struct r600_bc_alu *r600_bc_alu(void) +{ + struct r600_bc_alu *alu = CALLOC_STRUCT(r600_bc_alu); + + if (alu == NULL) + return NULL; + LIST_INITHEAD(&alu->list); + return alu; +} + +static struct r600_bc_vtx *r600_bc_vtx(void) +{ + struct r600_bc_vtx *vtx = CALLOC_STRUCT(r600_bc_vtx); + + if (vtx == NULL) + return NULL; + LIST_INITHEAD(&vtx->list); + return vtx; +} + +int r600_bc_init(struct r600_bc *bc, enum radeon_family family) +{ + LIST_INITHEAD(&bc->cf); + bc->family = family; + return 0; +} + +static int r600_bc_add_cf(struct r600_bc *bc) +{ + struct r600_bc_cf *cf = r600_bc_cf(); + + if (cf == NULL) + return -ENOMEM; + LIST_ADDTAIL(&cf->list, &bc->cf); + if (bc->cf_last) + cf->id = bc->cf_last->id + 2; + bc->cf_last = cf; + bc->ncf++; + bc->ndw += 2; + return 0; +} + +int r600_bc_add_output(struct r600_bc *bc, const struct r600_bc_output *output) +{ + int r; + + r = r600_bc_add_cf(bc); + if (r) + return r; + bc->cf_last->inst = output->inst; + memcpy(&bc->cf_last->output, output, sizeof(struct r600_bc_output)); + return 0; +} + +int r600_bc_add_alu(struct r600_bc *bc, const struct r600_bc_alu *alu) +{ + struct r600_bc_alu *nalu = r600_bc_alu(); + struct r600_bc_alu *lalu; + int i, r; + + if (nalu == NULL) + return -ENOMEM; + memcpy(nalu, alu, sizeof(struct r600_bc_alu)); + nalu->nliteral = 0; + + /* cf can contains only alu or only vtx or only tex */ + if (bc->cf_last == NULL || bc->cf_last->inst != (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3)) { + r = r600_bc_add_cf(bc); + if (r) { + free(nalu); + return r; + } + bc->cf_last->inst = V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3; + } + /* number of gpr == the last gpr used in any alu */ + for (i = 0; i < 3; i++) { + if (alu->src[i].sel >= bc->ngpr && alu->src[i].sel < 128) { + bc->ngpr = alu->src[i].sel + 1; + } + /* compute how many literal are needed + * either 2 or 4 literals + */ + if (alu->src[i].sel == 253) { + if (((alu->src[i].chan + 2) & 0x6) > nalu->nliteral) { + nalu->nliteral = (alu->src[i].chan + 2) & 0x6; + } + } + } + if (!LIST_IS_EMPTY(&bc->cf_last->alu)) { + lalu = LIST_ENTRY(struct r600_bc_alu, bc->cf_last->alu.prev, list); + if (!lalu->last && lalu->nliteral > nalu->nliteral) { + nalu->nliteral = lalu->nliteral; + } + } + if (alu->dst.sel >= bc->ngpr) { + bc->ngpr = alu->dst.sel + 1; + } + LIST_ADDTAIL(&nalu->list, &bc->cf_last->alu); + /* each alu use 2 dwords */ + bc->cf_last->ndw += 2; + bc->ndw += 2; + return 0; +} + +int r600_bc_add_literal(struct r600_bc *bc, const u32 *value) +{ + struct r600_bc_alu *alu; + + if (bc->cf_last == NULL || + bc->cf_last->inst != (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3) || + LIST_IS_EMPTY(&bc->cf_last->alu)) { + R600_ERR("last CF is not ALU (%p)\n", bc->cf_last); + return -EINVAL; + } + alu = LIST_ENTRY(struct r600_bc_alu, bc->cf_last->alu.prev, list); + if (!alu->last || !alu->nliteral) { + return 0; + } + memcpy(alu->value, value, 4 * 4); + bc->cf_last->ndw += alu->nliteral; + bc->ndw += alu->nliteral; + return 0; +} + +int r600_bc_add_vtx(struct r600_bc *bc, const struct r600_bc_vtx *vtx) +{ + struct r600_bc_vtx *nvtx = r600_bc_vtx(); + int r; + + if (nvtx == NULL) + return -ENOMEM; + memcpy(nvtx, vtx, sizeof(struct r600_bc_vtx)); + + /* cf can contains only alu or only vtx or only tex */ + if (bc->cf_last == NULL || + (bc->cf_last->inst != V_SQ_CF_WORD1_SQ_CF_INST_VTX && + bc->cf_last->inst != V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC)) { + r = r600_bc_add_cf(bc); + if (r) { + free(nvtx); + return r; + } + bc->cf_last->inst = V_SQ_CF_WORD1_SQ_CF_INST_VTX; + } + LIST_ADDTAIL(&nvtx->list, &bc->cf_last->vtx); + /* each fetch use 6 dwords */ + bc->cf_last->ndw += 4; + bc->ndw += 4; + return 0; +} + +int r600_bc_vtx_build(struct r600_bc *bc, struct r600_bc_vtx *vtx, unsigned id) +{ + bc->bytecode[id++] = S_SQ_VTX_WORD0_BUFFER_ID(vtx->buffer_id) | + S_SQ_VTX_WORD0_SRC_GPR(vtx->src_gpr) | + S_SQ_VTX_WORD0_SRC_SEL_X(vtx->src_sel_x) | + S_SQ_VTX_WORD0_MEGA_FETCH_COUNT(vtx->mega_fetch_count); + bc->bytecode[id++] = S_SQ_VTX_WORD1_DST_SEL_X(vtx->dst_sel_x) | + S_SQ_VTX_WORD1_DST_SEL_Y(vtx->dst_sel_y) | + S_SQ_VTX_WORD1_DST_SEL_Z(vtx->dst_sel_z) | + S_SQ_VTX_WORD1_DST_SEL_W(vtx->dst_sel_w) | + S_SQ_VTX_WORD1_USE_CONST_FIELDS(1) | + S_SQ_VTX_WORD1_GPR_DST_GPR(vtx->dst_gpr); + bc->bytecode[id++] = S_SQ_VTX_WORD2_MEGA_FETCH(1); + bc->bytecode[id++] = 0; + return 0; +} + +int r600_bc_alu_build(struct r600_bc *bc, struct r600_bc_alu *alu, unsigned id) +{ + unsigned i; + + /* don't replace gpr by pv or ps for destination register */ + if (alu->is_op3) { + bc->bytecode[id++] = S_SQ_ALU_WORD0_SRC0_SEL(alu->src[0].sel) | + S_SQ_ALU_WORD0_SRC0_CHAN(alu->src[0].chan) | + S_SQ_ALU_WORD0_SRC1_SEL(alu->src[1].sel) | + S_SQ_ALU_WORD0_SRC1_CHAN(alu->src[1].chan) | + S_SQ_ALU_WORD0_LAST(alu->last); + bc->bytecode[id++] = S_SQ_ALU_WORD1_DST_GPR(alu->dst.sel) | + S_SQ_ALU_WORD1_DST_CHAN(alu->dst.chan) | + S_SQ_ALU_WORD1_OP3_SRC2_SEL(alu->src[2].sel) | + S_SQ_ALU_WORD1_OP3_SRC2_CHAN(alu->src[2].chan) | + S_SQ_ALU_WORD1_OP3_SRC2_NEG(alu->src[2].neg) | + S_SQ_ALU_WORD1_OP3_ALU_INST(alu->inst) | + S_SQ_ALU_WORD1_BANK_SWIZZLE(0); + } else { + bc->bytecode[id++] = S_SQ_ALU_WORD0_SRC0_SEL(alu->src[0].sel) | + S_SQ_ALU_WORD0_SRC0_CHAN(alu->src[0].chan) | + S_SQ_ALU_WORD0_SRC0_NEG(alu->src[0].neg) | + S_SQ_ALU_WORD0_SRC1_SEL(alu->src[1].sel) | + S_SQ_ALU_WORD0_SRC1_CHAN(alu->src[1].chan) | + S_SQ_ALU_WORD0_SRC1_NEG(alu->src[1].neg) | + S_SQ_ALU_WORD0_LAST(alu->last); + bc->bytecode[id++] = S_SQ_ALU_WORD1_DST_GPR(alu->dst.sel) | + S_SQ_ALU_WORD1_DST_CHAN(alu->dst.chan) | + S_SQ_ALU_WORD1_OP2_SRC0_ABS(alu->src[0].abs) | + S_SQ_ALU_WORD1_OP2_SRC1_ABS(alu->src[1].abs) | + S_SQ_ALU_WORD1_OP2_WRITE_MASK(alu->dst.write) | + S_SQ_ALU_WORD1_OP2_ALU_INST(alu->inst) | + S_SQ_ALU_WORD1_BANK_SWIZZLE(0); + } + if (alu->last) { + for (i = 0; i < alu->nliteral; i++) { + bc->bytecode[id++] = alu->value[i]; + } + } + return 0; +} + +int r600_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf) +{ + unsigned id = cf->id; + + switch (cf->inst) { + case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3): + bc->bytecode[id++] = S_SQ_CF_ALU_WORD0_ADDR(cf->addr >> 1); + bc->bytecode[id++] = S_SQ_CF_ALU_WORD1_CF_INST(cf->inst >> 3) | + S_SQ_CF_ALU_WORD1_BARRIER(1) | + S_SQ_CF_ALU_WORD1_COUNT((cf->ndw / 2) - 1); + break; + case V_SQ_CF_WORD1_SQ_CF_INST_VTX: + case V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC: + bc->bytecode[id++] = S_SQ_CF_WORD0_ADDR(cf->addr >> 1); + bc->bytecode[id++] = S_SQ_CF_WORD1_CF_INST(cf->inst) | + S_SQ_CF_WORD1_BARRIER(1) | + S_SQ_CF_WORD1_COUNT((cf->ndw / 4) - 1); + break; + case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT: + case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE: + bc->bytecode[id++] = S_SQ_CF_ALLOC_EXPORT_WORD0_RW_GPR(cf->output.gpr) | + S_SQ_CF_ALLOC_EXPORT_WORD0_ELEM_SIZE(cf->output.elem_size) | + S_SQ_CF_ALLOC_EXPORT_WORD0_ARRAY_BASE(cf->output.array_base) | + S_SQ_CF_ALLOC_EXPORT_WORD0_TYPE(cf->output.type); + bc->bytecode[id++] = S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_X(cf->output.swizzle_x) | + S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Y(cf->output.swizzle_y) | + S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Z(cf->output.swizzle_z) | + S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_W(cf->output.swizzle_w) | + S_SQ_CF_ALLOC_EXPORT_WORD1_BARRIER(cf->output.barrier) | + S_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(cf->output.inst) | + S_SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM(cf->output.end_of_program); + break; + default: + R600_ERR("unsupported CF instruction (0x%X)\n", cf->inst); + return -EINVAL; + } + return 0; +} + +int r600_bc_build(struct r600_bc *bc) +{ + struct r600_bc_cf *cf; + struct r600_bc_alu *alu; + struct r600_bc_vtx *vtx; + unsigned addr; + int r; + + + /* first path compute addr of each CF block */ + /* addr start after all the CF instructions */ + addr = bc->cf_last->id + 2; + LIST_FOR_EACH_ENTRY(cf, &bc->cf, list) { + switch (cf->inst) { + case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3): + break; + case V_SQ_CF_WORD1_SQ_CF_INST_VTX: + case V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC: + /* fetch node need to be 16 bytes aligned*/ + addr += 3; + addr &= 0xFFFFFFFCUL; + break; + case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT: + case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE: + break; + default: + R600_ERR("unsupported CF instruction (0x%X)\n", cf->inst); + return -EINVAL; + } + cf->addr = addr; + addr += cf->ndw; + bc->ndw = cf->addr + cf->ndw; + } + free(bc->bytecode); + bc->bytecode = calloc(1, bc->ndw * 4); + if (bc->bytecode == NULL) + return -ENOMEM; + LIST_FOR_EACH_ENTRY(cf, &bc->cf, list) { + addr = cf->addr; + r = r600_bc_cf_build(bc, cf); + if (r) + return r; + switch (cf->inst) { + case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3): + LIST_FOR_EACH_ENTRY(alu, &cf->alu, list) { + switch (bc->family) { + case CHIP_R600: + case CHIP_RV610: + case CHIP_RV630: + case CHIP_RV670: + case CHIP_RV620: + case CHIP_RV635: + case CHIP_RS780: + case CHIP_RS880: + r = r600_bc_alu_build(bc, alu, addr); + break; + case CHIP_RV770: + case CHIP_RV730: + case CHIP_RV710: + case CHIP_RV740: + r = r700_bc_alu_build(bc, alu, addr); + break; + default: + R600_ERR("unknown family %d\n", bc->family); + return -EINVAL; + } + if (r) + return r; + addr += 2; + if (alu->last) { + addr += alu->nliteral; + } + } + break; + case V_SQ_CF_WORD1_SQ_CF_INST_VTX: + case V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC: + LIST_FOR_EACH_ENTRY(vtx, &cf->vtx, list) { + r = r600_bc_vtx_build(bc, vtx, addr); + if (r) + return r; + addr += 4; + } + break; + case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT: + case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE: + break; + default: + R600_ERR("unsupported CF instruction (0x%X)\n", cf->inst); + return -EINVAL; + } + } + return 0; +} diff --git a/src/gallium/drivers/r600/r600_asm.h b/src/gallium/drivers/r600/r600_asm.h new file mode 100644 index 0000000000..8a874a9df5 --- /dev/null +++ b/src/gallium/drivers/r600/r600_asm.h @@ -0,0 +1,112 @@ +/* + * Copyright 2010 Jerome Glisse + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#ifndef R600_ASM_H +#define R600_ASM_H + +#include "radeon.h" +#include "util/u_double_list.h" + +struct r600_bc_alu_src { + unsigned sel; + unsigned chan; + unsigned neg; + unsigned abs; +}; + +struct r600_bc_alu_dst { + unsigned sel; + unsigned chan; + unsigned clamp; + unsigned write; +}; + +struct r600_bc_alu { + struct list_head list; + struct r600_bc_alu_src src[3]; + struct r600_bc_alu_dst dst; + unsigned inst; + unsigned last; + unsigned is_op3; + unsigned nliteral; + u32 value[4]; +}; + +struct r600_bc_vtx { + struct list_head list; + unsigned inst; + unsigned fetch_type; + unsigned buffer_id; + unsigned src_gpr; + unsigned src_sel_x; + unsigned mega_fetch_count; + unsigned dst_gpr; + unsigned dst_sel_x; + unsigned dst_sel_y; + unsigned dst_sel_z; + unsigned dst_sel_w; +}; + +struct r600_bc_output { + unsigned array_base; + unsigned type; + unsigned end_of_program; + unsigned inst; + unsigned elem_size; + unsigned gpr; + unsigned swizzle_x; + unsigned swizzle_y; + unsigned swizzle_z; + unsigned swizzle_w; + unsigned barrier; +}; + +struct r600_bc_cf { + struct list_head list; + unsigned inst; + unsigned addr; + unsigned ndw; + unsigned id; + struct list_head alu; + struct list_head vtx; + struct r600_bc_output output; +}; + +struct r600_bc { + enum radeon_family family; + struct list_head cf; + struct r600_bc_cf *cf_last; + unsigned ndw; + unsigned ncf; + unsigned ngpr; + unsigned nresource; + u32 *bytecode; +}; + +int r600_bc_init(struct r600_bc *bc, enum radeon_family family); +int r600_bc_add_alu(struct r600_bc *bc, const struct r600_bc_alu *alu); +int r600_bc_add_literal(struct r600_bc *bc, const u32 *value); +int r600_bc_add_vtx(struct r600_bc *bc, const struct r600_bc_vtx *vtx); +int r600_bc_add_output(struct r600_bc *bc, const struct r600_bc_output *output); +int r600_bc_build(struct r600_bc *bc); + +#endif diff --git a/src/gallium/drivers/r600/r600_compiler.c b/src/gallium/drivers/r600/r600_compiler.c deleted file mode 100644 index 1804b86d24..0000000000 --- a/src/gallium/drivers/r600/r600_compiler.c +++ /dev/null @@ -1,447 +0,0 @@ -/* - * Copyright 2010 Jerome Glisse - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - */ -#include -#include -#include -#include -#include -#include "r600_compiler.h" - -struct c_vector *c_vector_new(void) -{ - struct c_vector *v = calloc(1, sizeof(struct c_vector)); - - if (v == NULL) { - return NULL; - } - LIST_INITHEAD(&v->head); - return v; -} - -static unsigned c_opcode_is_alu(unsigned opcode) -{ - switch (opcode) { - case C_OPCODE_MOV: - case C_OPCODE_MUL: - case C_OPCODE_MAD: - case C_OPCODE_ARL: - case C_OPCODE_LIT: - case C_OPCODE_RCP: - case C_OPCODE_RSQ: - case C_OPCODE_EXP: - case C_OPCODE_LOG: - case C_OPCODE_ADD: - case C_OPCODE_DP3: - case C_OPCODE_DP4: - case C_OPCODE_DST: - case C_OPCODE_MIN: - case C_OPCODE_MAX: - case C_OPCODE_SLT: - case C_OPCODE_SGE: - case C_OPCODE_SUB: - case C_OPCODE_LRP: - case C_OPCODE_CND: - case C_OPCODE_DP2A: - case C_OPCODE_FRC: - case C_OPCODE_CLAMP: - case C_OPCODE_FLR: - case C_OPCODE_ROUND: - case C_OPCODE_EX2: - case C_OPCODE_LG2: - case C_OPCODE_POW: - case C_OPCODE_XPD: - case C_OPCODE_ABS: - case C_OPCODE_RCC: - case C_OPCODE_DPH: - case C_OPCODE_COS: - case C_OPCODE_DDX: - case C_OPCODE_DDY: - case C_OPCODE_PK2H: - case C_OPCODE_PK2US: - case C_OPCODE_PK4B: - case C_OPCODE_PK4UB: - case C_OPCODE_RFL: - case C_OPCODE_SEQ: - case C_OPCODE_SFL: - case C_OPCODE_SGT: - case C_OPCODE_SIN: - case C_OPCODE_SLE: - case C_OPCODE_SNE: - case C_OPCODE_STR: - case C_OPCODE_UP2H: - case C_OPCODE_UP2US: - case C_OPCODE_UP4B: - case C_OPCODE_UP4UB: - case C_OPCODE_X2D: - case C_OPCODE_ARA: - case C_OPCODE_ARR: - case C_OPCODE_BRA: - case C_OPCODE_SSG: - case C_OPCODE_CMP: - case C_OPCODE_SCS: - case C_OPCODE_NRM: - case C_OPCODE_DIV: - case C_OPCODE_DP2: - case C_OPCODE_CEIL: - case C_OPCODE_I2F: - case C_OPCODE_NOT: - case C_OPCODE_TRUNC: - case C_OPCODE_SHL: - case C_OPCODE_AND: - case C_OPCODE_OR: - case C_OPCODE_MOD: - case C_OPCODE_XOR: - case C_OPCODE_SAD: - case C_OPCODE_NRM4: - case C_OPCODE_F2I: - case C_OPCODE_IDIV: - case C_OPCODE_IMAX: - case C_OPCODE_IMIN: - case C_OPCODE_INEG: - case C_OPCODE_ISGE: - case C_OPCODE_ISHR: - case C_OPCODE_ISLT: - case C_OPCODE_F2U: - case C_OPCODE_U2F: - case C_OPCODE_UADD: - case C_OPCODE_UDIV: - case C_OPCODE_UMAD: - case C_OPCODE_UMAX: - case C_OPCODE_UMIN: - case C_OPCODE_UMOD: - case C_OPCODE_UMUL: - case C_OPCODE_USEQ: - case C_OPCODE_USGE: - case C_OPCODE_USHR: - case C_OPCODE_USLT: - case C_OPCODE_USNE: - return 1; - case C_OPCODE_END: - case C_OPCODE_VFETCH: - case C_OPCODE_KILP: - case C_OPCODE_CAL: - case C_OPCODE_RET: - case C_OPCODE_TXB: - case C_OPCODE_TXL: - case C_OPCODE_BRK: - case C_OPCODE_IF: - case C_OPCODE_BGNFOR: - case C_OPCODE_REP: - case C_OPCODE_ELSE: - case C_OPCODE_ENDIF: - case C_OPCODE_ENDFOR: - case C_OPCODE_ENDREP: - case C_OPCODE_PUSHA: - case C_OPCODE_POPA: - case C_OPCODE_TXF: - case C_OPCODE_TXQ: - case C_OPCODE_CONT: - case C_OPCODE_EMIT: - case C_OPCODE_ENDPRIM: - case C_OPCODE_BGNLOOP: - case C_OPCODE_BGNSUB: - case C_OPCODE_ENDLOOP: - case C_OPCODE_ENDSUB: - case C_OPCODE_NOP: - case C_OPCODE_CALLNZ: - case C_OPCODE_IFC: - case C_OPCODE_BREAKC: - case C_OPCODE_KIL: - case C_OPCODE_TEX: - case C_OPCODE_TXD: - case C_OPCODE_TXP: - case C_OPCODE_SWITCH: - case C_OPCODE_CASE: - case C_OPCODE_DEFAULT: - case C_OPCODE_ENDSWITCH: - default: - return 0; - } -} - - -/* NEW */ -void c_node_init(struct c_node *node) -{ - memset(node, 0, sizeof(struct c_node)); - LIST_INITHEAD(&node->predecessors); - LIST_INITHEAD(&node->successors); - LIST_INITHEAD(&node->childs); - LIST_INITHEAD(&node->insts); - node->parent = NULL; -} - -static struct c_node_link *c_node_link_new(struct c_node *node) -{ - struct c_node_link *link; - - link = calloc(1, sizeof(struct c_node_link)); - if (link == NULL) - return NULL; - LIST_INITHEAD(&link->head); - link->node = node; - return link; -} - -int c_node_cfg_link(struct c_node *predecessor, struct c_node *successor) -{ - struct c_node_link *pedge, *sedge; - - pedge = c_node_link_new(successor); - sedge = c_node_link_new(predecessor); - if (sedge == NULL || pedge == NULL) { - free(sedge); - free(pedge); - return -ENOMEM; - } - LIST_ADDTAIL(&pedge->head, &predecessor->successors); - LIST_ADDTAIL(&sedge->head, &successor->predecessors); - - return 0; -} - -int c_node_add_new_instruction_head(struct c_node *node, struct c_instruction *instruction) -{ - struct c_instruction *inst = malloc(sizeof(struct c_instruction)); - - if (inst == NULL) - return -ENOMEM; - memcpy(inst, instruction, sizeof(struct c_instruction)); - LIST_ADD(&inst->head, &node->insts); - return 0; -} - -int c_node_add_new_instruction(struct c_node *node, struct c_instruction *instruction) -{ - struct c_instruction *inst = malloc(sizeof(struct c_instruction)); - - if (inst == NULL) - return -ENOMEM; - memcpy(inst, instruction, sizeof(struct c_instruction)); - LIST_ADDTAIL(&inst->head, &node->insts); - return 0; -} - -struct c_node *c_shader_cfg_new_node_after(struct c_shader *shader, struct c_node *predecessor) -{ - struct c_node *node = calloc(1, sizeof(struct c_node)); - - if (node == NULL) - return NULL; - c_node_init(node); - if (c_node_cfg_link(predecessor, node)) { - free(node); - return NULL; - } - LIST_ADDTAIL(&node->head, &shader->nodes); - return node; -} - -int c_shader_init(struct c_shader *shader, unsigned type) -{ - unsigned i; - int r; - - shader->type = type; - for (i = 0; i < C_FILE_COUNT; i++) { - shader->files[i].nvectors = 0; - LIST_INITHEAD(&shader->files[i].vectors); - } - LIST_INITHEAD(&shader->nodes); - c_node_init(&shader->entry); - c_node_init(&shader->end); - shader->entry.opcode = C_OPCODE_ENTRY; - shader->end.opcode = C_OPCODE_END; - r = c_node_cfg_link(&shader->entry, &shader->end); - if (r) - return r; - return 0; -} - -struct c_vector *c_shader_vector_new(struct c_shader *shader, unsigned file, unsigned name, int sid) -{ - struct c_vector *v = calloc(1, sizeof(struct c_vector)); - int i; - - if (v == NULL) { - return NULL; - } - for (i = 0; i < 4; i++) { - v->channel[i] = calloc(1, sizeof(struct c_channel)); - if (v->channel[i] == NULL) - goto out_err; - v->channel[i]->vindex = i; - v->channel[i]->vector = v; - } - v->file = file; - v->name = name; - v->sid = sid; - shader->files[v->file].nvectors++; - v->id = shader->nvectors++; - LIST_ADDTAIL(&v->head, &shader->files[v->file].vectors); - return v; -out_err: - for (i = 0; i < 4; i++) { - free(v->channel[i]); - } - free(v); - return NULL; -} - -static void c_node_remove_link(struct list_head *head, struct c_node *node) -{ - struct c_node_link *link, *tmp; - - LIST_FOR_EACH_ENTRY_SAFE(link, tmp, head, head) { - if (link->node == node) { - LIST_DEL(&link->head); - free(link); - } - } -} - -static void c_node_destroy(struct c_node *node) -{ - struct c_instruction *i, *ni; - struct c_node_link *link, *tmp; - - LIST_FOR_EACH_ENTRY_SAFE(i, ni, &node->insts, head) { - LIST_DEL(&i->head); - free(i); - } - if (node->parent) - c_node_remove_link(&node->parent->childs, node); - node->parent = NULL; - LIST_FOR_EACH_ENTRY_SAFE(link, tmp, &node->predecessors, head) { - c_node_remove_link(&link->node->successors, node); - LIST_DEL(&link->head); - free(link); - } - LIST_FOR_EACH_ENTRY_SAFE(link, tmp, &node->successors, head) { - c_node_remove_link(&link->node->predecessors, node); - LIST_DEL(&link->head); - free(link); - } - LIST_FOR_EACH_ENTRY_SAFE(link, tmp, &node->childs, head) { - link->node->parent = NULL; - LIST_DEL(&link->head); - free(link); - } -} - -void c_shader_destroy(struct c_shader *shader) -{ - struct c_node *n, *nn; - struct c_vector *v, *nv; - unsigned i; - - for (i = 0; i < C_FILE_COUNT; i++) { - shader->files[i].nvectors = 0; - LIST_FOR_EACH_ENTRY_SAFE(v, nv, &shader->files[i].vectors, head) { - LIST_DEL(&v->head); - free(v->channel[0]); - free(v->channel[1]); - free(v->channel[2]); - free(v->channel[3]); - free(v); - } - } - LIST_FOR_EACH_ENTRY_SAFE(n, nn, &shader->nodes, head) { - LIST_DEL(&n->head); - c_node_destroy(n); - } - memset(shader, 0, sizeof(struct c_shader)); -} - -static void c_shader_dfs_without_rec(struct c_node *entry, struct c_node *node) -{ - struct c_node_link *link; - - if (entry == node || entry->visited) - return; - entry->visited = 1; - LIST_FOR_EACH_ENTRY(link, &entry->successors, head) { - c_shader_dfs_without_rec(link->node, node); - } -} - -static void c_shader_dfs_without(struct c_shader *shader, struct c_node *node) -{ - struct c_node *n; - - shader->entry.visited = 0; - shader->end.visited = 0; - LIST_FOR_EACH_ENTRY(n, &shader->nodes, head) { - n->visited = 0; - } - c_shader_dfs_without_rec(&shader->entry, node); -} - -static int c_shader_build_dominator_tree_rec(struct c_shader *shader, struct c_node *node) -{ - struct c_node_link *link, *nlink; - unsigned found = 0; - int r; - - if (node->done) - return 0; - node->done = 1; - LIST_FOR_EACH_ENTRY(link, &node->predecessors, head) { - /* if we remove this predecessor can we reach the current node ? */ - c_shader_dfs_without(shader, link->node); - if (node->visited == 0) { - /* we were unable to visit current node thus current - * predecessor is the immediate dominator of node, as - * their can be only one immediate dominator we break - */ - node->parent = link->node; - nlink = c_node_link_new(node); - if (nlink == NULL) - return -ENOMEM; - LIST_ADDTAIL(&nlink->head, &link->node->childs); - found = 1; - break; - } - } - /* this shouldn't happen there should at least be 1 denominator for each node */ - if (!found && node->opcode != C_OPCODE_ENTRY) { - fprintf(stderr, "invalid flow control graph node %p (%d) has no immediate dominator\n", - node, node->opcode); - return -EINVAL; - } - LIST_FOR_EACH_ENTRY(link, &node->predecessors, head) { - r = c_shader_build_dominator_tree_rec(shader, link->node); - if (r) - return r; - } - return 0; -} - -int c_shader_build_dominator_tree(struct c_shader *shader) -{ - struct c_node *node; - LIST_FOR_EACH_ENTRY(node, &shader->nodes, head) { - node->done = 0; - } - return c_shader_build_dominator_tree_rec(shader, &shader->end); -} diff --git a/src/gallium/drivers/r600/r600_compiler.h b/src/gallium/drivers/r600/r600_compiler.h deleted file mode 100644 index 77230aed73..0000000000 --- a/src/gallium/drivers/r600/r600_compiler.h +++ /dev/null @@ -1,320 +0,0 @@ -/* - * Copyright 2010 Jerome Glisse - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - */ -#ifndef R600_COMPILER_H -#define R600_COMPILER_H - -#include "util/u_double_list.h" - -struct c_vector; - -/* operand are the basic source/destination of each operation */ -struct c_channel { - struct list_head head; - unsigned vindex; /**< index in vector X,Y,Z,W (0,1,2,3) */ - unsigned value; /**< immediate value 32bits */ - struct c_vector *vector; /**< vector to which it belongs */ -}; - -/* in GPU world most of the time operand are grouped into vector - * of 4 component this structure is mostly and handler to group - * operand into a same vector - */ -struct c_vector { - struct list_head head; - unsigned id; /**< vector uniq id */ - unsigned name; /**< semantic name */ - unsigned file; /**< operand file C_FILE_* */ - int sid; /**< semantic id */ - struct c_channel *channel[4]; /**< operands */ -}; - -#define C_PROGRAM_TYPE_VS 0 -#define C_PROGRAM_TYPE_FS 1 -#define C_PROGRAM_TYPE_COUNT 2 - -#define C_NODE_FLAG_ALU 1 -#define C_NODE_FLAG_FETCH 2 - -#define C_SWIZZLE_X 0 -#define C_SWIZZLE_Y 1 -#define C_SWIZZLE_Z 2 -#define C_SWIZZLE_W 3 -#define C_SWIZZLE_0 4 -#define C_SWIZZLE_1 5 -#define C_SWIZZLE_D 6 - -#define C_FILE_NULL 0 -#define C_FILE_CONSTANT 1 -#define C_FILE_INPUT 2 -#define C_FILE_OUTPUT 3 -#define C_FILE_TEMPORARY 4 -#define C_FILE_SAMPLER 5 -#define C_FILE_ADDRESS 6 -#define C_FILE_IMMEDIATE 7 -#define C_FILE_LOOP 8 -#define C_FILE_PREDICATE 9 -#define C_FILE_SYSTEM_VALUE 10 -#define C_FILE_RESOURCE 11 -#define C_FILE_COUNT 12 - -#define C_SEMANTIC_POSITION 0 -#define C_SEMANTIC_COLOR 1 -#define C_SEMANTIC_BCOLOR 2 /**< back-face color */ -#define C_SEMANTIC_FOG 3 -#define C_SEMANTIC_PSIZE 4 -#define C_SEMANTIC_GENERIC 5 -#define C_SEMANTIC_NORMAL 6 -#define C_SEMANTIC_FACE 7 -#define C_SEMANTIC_EDGEFLAG 8 -#define C_SEMANTIC_PRIMID 9 -#define C_SEMANTIC_INSTANCEID 10 -#define C_SEMANTIC_VERTEXID 11 -#define C_SEMANTIC_COUNT 12 /**< number of semantic values */ - -#define C_OPCODE_NOP 0 -#define C_OPCODE_MOV 1 -#define C_OPCODE_LIT 2 -#define C_OPCODE_RCP 3 -#define C_OPCODE_RSQ 4 -#define C_OPCODE_EXP 5 -#define C_OPCODE_LOG 6 -#define C_OPCODE_MUL 7 -#define C_OPCODE_ADD 8 -#define C_OPCODE_DP3 9 -#define C_OPCODE_DP4 10 -#define C_OPCODE_DST 11 -#define C_OPCODE_MIN 12 -#define C_OPCODE_MAX 13 -#define C_OPCODE_SLT 14 -#define C_OPCODE_SGE 15 -#define C_OPCODE_MAD 16 -#define C_OPCODE_SUB 17 -#define C_OPCODE_LRP 18 -#define C_OPCODE_CND 19 -/* gap */ -#define C_OPCODE_DP2A 21 -/* gap */ -#define C_OPCODE_FRC 24 -#define C_OPCODE_CLAMP 25 -#define C_OPCODE_FLR 26 -#define C_OPCODE_ROUND 27 -#define C_OPCODE_EX2 28 -#define C_OPCODE_LG2 29 -#define C_OPCODE_POW 30 -#define C_OPCODE_XPD 31 -/* gap */ -#define C_OPCODE_ABS 33 -#define C_OPCODE_RCC 34 -#define C_OPCODE_DPH 35 -#define C_OPCODE_COS 36 -#define C_OPCODE_DDX 37 -#define C_OPCODE_DDY 38 -#define C_OPCODE_KILP 39 /* predicated kill */ -#define C_OPCODE_PK2H 40 -#define C_OPCODE_PK2US 41 -#define C_OPCODE_PK4B 42 -#define C_OPCODE_PK4UB 43 -#define C_OPCODE_RFL 44 -#define C_OPCODE_SEQ 45 -#define C_OPCODE_SFL 46 -#define C_OPCODE_SGT 47 -#define C_OPCODE_SIN 48 -#define C_OPCODE_SLE 49 -#define C_OPCODE_SNE 50 -#define C_OPCODE_STR 51 -#define C_OPCODE_TEX 52 -#define C_OPCODE_TXD 53 -#define C_OPCODE_TXP 54 -#define C_OPCODE_UP2H 55 -#define C_OPCODE_UP2US 56 -#define C_OPCODE_UP4B 57 -#define C_OPCODE_UP4UB 58 -#define C_OPCODE_X2D 59 -#define C_OPCODE_ARA 60 -#define C_OPCODE_ARR 61 -#define C_OPCODE_BRA 62 -#define C_OPCODE_CAL 63 -#define C_OPCODE_RET 64 -#define C_OPCODE_SSG 65 /* SGN */ -#define C_OPCODE_CMP 66 -#define C_OPCODE_SCS 67 -#define C_OPCODE_TXB 68 -#define C_OPCODE_NRM 69 -#define C_OPCODE_DIV 70 -#define C_OPCODE_DP2 71 -#define C_OPCODE_TXL 72 -#define C_OPCODE_BRK 73 -#define C_OPCODE_IF 74 -#define C_OPCODE_BGNFOR 75 -#define C_OPCODE_REP 76 -#define C_OPCODE_ELSE 77 -#define C_OPCODE_ENDIF 78 -#define C_OPCODE_ENDFOR 79 -#define C_OPCODE_ENDREP 80 -#define C_OPCODE_PUSHA 81 -#define C_OPCODE_POPA 82 -#define C_OPCODE_CEIL 83 -#define C_OPCODE_I2F 84 -#define C_OPCODE_NOT 85 -#define C_OPCODE_TRUNC 86 -#define C_OPCODE_SHL 87 -/* gap */ -#define C_OPCODE_AND 89 -#define C_OPCODE_OR 90 -#define C_OPCODE_MOD 91 -#define C_OPCODE_XOR 92 -#define C_OPCODE_SAD 93 -#define C_OPCODE_TXF 94 -#define C_OPCODE_TXQ 95 -#define C_OPCODE_CONT 96 -#define C_OPCODE_EMIT 97 -#define C_OPCODE_ENDPRIM 98 -#define C_OPCODE_BGNLOOP 99 -#define C_OPCODE_BGNSUB 100 -#define C_OPCODE_ENDLOOP 101 -#define C_OPCODE_ENDSUB 102 -/* gap */ -#define C_OPCODE_NRM4 112 -#define C_OPCODE_CALLNZ 113 -#define C_OPCODE_IFC 114 -#define C_OPCODE_BREAKC 115 -#define C_OPCODE_KIL 116 /* conditional kill */ -#define C_OPCODE_END 117 /* aka HALT */ -/* gap */ -#define C_OPCODE_F2I 119 -#define C_OPCODE_IDIV 120 -#define C_OPCODE_IMAX 121 -#define C_OPCODE_IMIN 122 -#define C_OPCODE_INEG 123 -#define C_OPCODE_ISGE 124 -#define C_OPCODE_ISHR 125 -#define C_OPCODE_ISLT 126 -#define C_OPCODE_F2U 127 -#define C_OPCODE_U2F 128 -#define C_OPCODE_UADD 129 -#define C_OPCODE_UDIV 130 -#define C_OPCODE_UMAD 131 -#define C_OPCODE_UMAX 132 -#define C_OPCODE_UMIN 133 -#define C_OPCODE_UMOD 134 -#define C_OPCODE_UMUL 135 -#define C_OPCODE_USEQ 136 -#define C_OPCODE_USGE 137 -#define C_OPCODE_USHR 138 -#define C_OPCODE_USLT 139 -#define C_OPCODE_USNE 140 -#define C_OPCODE_SWITCH 141 -#define C_OPCODE_CASE 142 -#define C_OPCODE_DEFAULT 143 -#define C_OPCODE_ENDSWITCH 144 -#define C_OPCODE_VFETCH 145 -#define C_OPCODE_ENTRY 146 -#define C_OPCODE_ARL 147 -#define C_OPCODE_LAST 148 - -#define C_OPERAND_FLAG_ABS (1 << 0) -#define C_OPERAND_FLAG_NEG (1 << 1) - -struct c_operand { - struct c_vector *vector; - unsigned swizzle; - unsigned flag; -}; - -struct c_op { - unsigned ninput; - struct c_operand input[3]; - struct c_operand output; - unsigned opcode; -}; - -struct c_instruction { - struct list_head head; - unsigned nop; - struct c_op op[5]; -}; - -struct c_node; - -struct c_node_link { - struct list_head head; - struct c_node *node; -}; - -/** - * struct c_node - * - * @next: all node are in a double linked list, this point to - * next node - * @next: all node are in a double linked list, this point to - * previous node - * @predecessors: list of all predecessor nodes in the flow graph - * @successors: list of all sucessor nodes in the flow graph - * @parent: parent node in the depth first walk tree - * @childs: child nodes in the depth first walk tree - */ -struct c_node { - struct list_head head; - struct list_head predecessors; - struct list_head successors; - struct list_head childs; - struct c_node *parent; - struct list_head insts; - unsigned opcode; - unsigned visited; - unsigned done; - void *backend; -}; - -struct c_file { - unsigned nvectors; - struct list_head vectors; -}; - -struct c_shader { - unsigned nvectors; - struct c_file files[C_FILE_COUNT]; - struct list_head nodes; - struct c_node entry; - struct c_node end; - unsigned type; -}; - -int c_shader_init(struct c_shader *shader, unsigned type); -void c_shader_destroy(struct c_shader *shader); -struct c_vector *c_shader_vector_new(struct c_shader *shader, unsigned file, unsigned name, int sid); -int c_shader_build_dominator_tree(struct c_shader *shader); -void c_shader_dump(struct c_shader *shader); - -void c_node_init(struct c_node *node); -int c_node_add_new_instruction(struct c_node *node, struct c_instruction *instruction); -int c_node_add_new_instruction_head(struct c_node *node, struct c_instruction *instruction); - -/* control flow graph functions */ -int c_node_cfg_link(struct c_node *predecessor, struct c_node *successor); -struct c_node *c_node_cfg_new_after(struct c_node *predecessor); -struct c_node *c_shader_cfg_new_node_after(struct c_shader *shader, struct c_node *predecessor); - -struct c_vector *c_vector_new(void); - -#endif diff --git a/src/gallium/drivers/r600/r600_compiler_dump.c b/src/gallium/drivers/r600/r600_compiler_dump.c deleted file mode 100644 index bb022b7c29..0000000000 --- a/src/gallium/drivers/r600/r600_compiler_dump.c +++ /dev/null @@ -1,267 +0,0 @@ -/* - * Copyright 2010 Jerome Glisse - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - */ -#include -#include -#include -#include -#include "r600_compiler.h" - -static const char *c_file_swz[] = { - "x", - "y", - "z", - "w", - "0", - "1", - ".", -}; - -static const char *c_file_str[] = { - "NULL", - "CONSTANT", - "INPUT", - "OUTPUT", - "TEMPORARY", - "SAMPLER", - "ADDRESS", - "IMMEDIATE", - "LOOP", - "PREDICATE", - "SYSTEM_VALUE", -}; - -static const char *c_semantic_str[] = { - "POSITION", - "COLOR", - "BCOLOR", - "FOG", - "PSIZE", - "GENERIC", - "NORMAL", - "FACE", - "EDGEFLAG", - "PRIMID", - "INSTANCEID", -}; - -static const char *c_opcode_str[] = { - "ARL", - "MOV", - "LIT", - "RCP", - "RSQ", - "EXP", - "LOG", - "MUL", - "ADD", - "DP3", - "DP4", - "DST", - "MIN", - "MAX", - "SLT", - "SGE", - "MAD", - "SUB", - "LRP", - "CND", - "(INVALID)", - "DP2A", - "(INVALID)", - "(INVALID)", - "FRC", - "CLAMP", - "FLR", - "ROUND", - "EX2", - "LG2", - "POW", - "XPD", - "(INVALID)", - "ABS", - "RCC", - "DPH", - "COS", - "DDX", - "DDY", - "KILP", - "PK2H", - "PK2US", - "PK4B", - "PK4UB", - "RFL", - "SEQ", - "SFL", - "SGT", - "SIN", - "SLE", - "SNE", - "STR", - "TEX", - "TXD", - "TXP", - "UP2H", - "UP2US", - "UP4B", - "UP4UB", - "X2D", - "ARA", - "ARR", - "BRA", - "CAL", - "RET", - "SSG", - "CMP", - "SCS", - "TXB", - "NRM", - "DIV", - "DP2", - "TXL", - "BRK", - "IF", - "BGNFOR", - "REP", - "ELSE", - "ENDIF", - "ENDFOR", - "ENDREP", - "PUSHA", - "POPA", - "CEIL", - "I2F", - "NOT", - "TRUNC", - "SHL", - "(INVALID)", - "AND", - "OR", - "MOD", - "XOR", - "SAD", - "TXF", - "TXQ", - "CONT", - "EMIT", - "ENDPRIM", - "BGNLOOP", - "BGNSUB", - "ENDLOOP", - "ENDSUB", - "(INVALID)", - "(INVALID)", - "(INVALID)", - "(INVALID)", - "NOP", - "(INVALID)", - "(INVALID)", - "(INVALID)", - "(INVALID)", - "NRM4", - "CALLNZ", - "IFC", - "BREAKC", - "KIL", - "END", - "(INVALID)", - "F2I", - "IDIV", - "IMAX", - "IMIN", - "INEG", - "ISGE", - "ISHR", - "ISLT", - "F2U", - "U2F", - "UADD", - "UDIV", - "UMAD", - "UMAX", - "UMIN", - "UMOD", - "UMUL", - "USEQ", - "USGE", - "USHR", - "USLT", - "USNE", - "SWITCH", - "CASE", - "DEFAULT", - "ENDSWITCH", - "VFETCH", - "ENTRY", -}; - -static inline const char *c_get_name(const char *name[], unsigned i) -{ - return name[i]; -} - -static void pindent(unsigned indent) -{ - unsigned i; - for (i = 0; i < indent; i++) - fprintf(stderr, " "); -} - -static void c_node_dump(struct c_node *node, unsigned indent) -{ - struct c_instruction *i; - unsigned j, k; - - pindent(indent); fprintf(stderr, "# node %s\n", c_get_name(c_opcode_str, node->opcode)); - LIST_FOR_EACH_ENTRY(i, &node->insts, head) { - for (k = 0; k < i->nop; k++) { - pindent(indent); - fprintf(stderr, "%s", c_get_name(c_opcode_str, i->op[k].opcode)); - fprintf(stderr, " %s[%d][%s]", - c_get_name(c_file_str, i->op[k].output.vector->file), - i->op[k].output.vector->id, - c_get_name(c_file_swz, i->op[k].output.swizzle)); - for (j = 0; j < i->op[k].ninput; j++) { - fprintf(stderr, " %s[%d][%s]", - c_get_name(c_file_str, i->op[k].input[j].vector->file), - i->op[k].input[j].vector->id, - c_get_name(c_file_swz, i->op[k].input[j].swizzle)); - } - fprintf(stderr, ";\n"); - } - } -} - -static void c_shader_dump_rec(struct c_shader *shader, struct c_node *node, unsigned indent) -{ - struct c_node_link *link; - - c_node_dump(node, indent); - LIST_FOR_EACH_ENTRY(link, &node->childs, head) { - c_shader_dump_rec(shader, link->node, indent + 1); - } -} - -void c_shader_dump(struct c_shader *shader) -{ - c_shader_dump_rec(shader, &shader->entry, 0); -} diff --git a/src/gallium/drivers/r600/r600_compiler_r600.c b/src/gallium/drivers/r600/r600_compiler_r600.c deleted file mode 100644 index 27ad8f1a18..0000000000 --- a/src/gallium/drivers/r600/r600_compiler_r600.c +++ /dev/null @@ -1,972 +0,0 @@ -/* - * Copyright 2010 Jerome Glisse - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - */ -#include -#include -#include -#include -#include -#include -#include "r600_screen.h" -#include "r600_context.h" -#include "r600_sq.h" - - -struct r600_alu_instruction { - unsigned copcode; - enum r600_instruction instruction; -}; - -static int r600_shader_alu_translate(struct r600_shader *rshader, - struct r600_shader_node *node, - struct c_instruction *instruction); -struct r600_alu_instruction r600_alu_instruction[C_OPCODE_LAST]; -struct r600_instruction_info r600_instruction_info[]; - -int r600_shader_insert_fetch(struct c_shader *shader) -{ - struct c_vector *vi, *vr, *v, *nv; - struct c_instruction instruction; - int r; - - if (shader->type != C_PROGRAM_TYPE_VS) - return 0; - vi = c_shader_vector_new(shader, C_FILE_INPUT, C_SEMANTIC_VERTEXID, -1); - if (vi == NULL) - return -ENOMEM; - LIST_FOR_EACH_ENTRY_SAFE(v, nv, &shader->files[C_FILE_INPUT].vectors, head) { - if (v == vi) - continue; - vr = c_shader_vector_new(shader, C_FILE_RESOURCE, C_SEMANTIC_GENERIC, -1); - if (vr == NULL) - return -ENOMEM; - memset(&instruction, 0, sizeof(struct c_instruction)); - instruction.nop = 4; - instruction.op[0].opcode = C_OPCODE_VFETCH; - instruction.op[1].opcode = C_OPCODE_VFETCH; - instruction.op[2].opcode = C_OPCODE_VFETCH; - instruction.op[3].opcode = C_OPCODE_VFETCH; - instruction.op[0].ninput = 2; - instruction.op[1].ninput = 2; - instruction.op[2].ninput = 2; - instruction.op[3].ninput = 2; - instruction.op[0].output.vector = v; - instruction.op[1].output.vector = v; - instruction.op[2].output.vector = v; - instruction.op[3].output.vector = v; - instruction.op[0].input[0].vector = vi; - instruction.op[0].input[1].vector = vr; - instruction.op[1].input[0].vector = vi; - instruction.op[1].input[1].vector = vr; - instruction.op[2].input[0].vector = vi; - instruction.op[2].input[1].vector = vr; - instruction.op[3].input[0].vector = vi; - instruction.op[3].input[1].vector = vr; - instruction.op[0].output.swizzle = C_SWIZZLE_X; - instruction.op[1].output.swizzle = C_SWIZZLE_Y; - instruction.op[2].output.swizzle = C_SWIZZLE_Z; - instruction.op[3].output.swizzle = C_SWIZZLE_W; - r = c_node_add_new_instruction_head(&shader->entry, &instruction); - if (r) - return r; - LIST_DEL(&v->head); - shader->files[C_FILE_INPUT].nvectors--; - LIST_ADDTAIL(&v->head, &shader->files[C_FILE_TEMPORARY].vectors); - shader->files[C_FILE_TEMPORARY].nvectors++; - v->file = C_FILE_TEMPORARY; - } - return 0; -} - -void r600_shader_cleanup(struct r600_shader *rshader) -{ - struct r600_shader_node *n, *nn; - struct r600_shader_vfetch *vf, *nvf; - struct r600_shader_alu *alu, *nalu; - int i; - - if (rshader == NULL) - return; - if (rshader->gpr) { - for (i = 0; i < rshader->nvector; i++) { - free(rshader->gpr[i]); - } - free(rshader->gpr); - rshader->gpr = NULL; - } - LIST_FOR_EACH_ENTRY_SAFE(n, nn, &rshader->nodes, head) { - LIST_DEL(&n->head); - LIST_FOR_EACH_ENTRY_SAFE(vf, nvf, &n->vfetch, head) { - LIST_DEL(&vf->head); - free(vf); - } - LIST_FOR_EACH_ENTRY_SAFE(alu, nalu, &n->alu, head) { - LIST_DEL(&alu->head); - free(alu); - } - free(n); - } - free(rshader->bcode); - return; -} - -int r600_shader_vfetch_bytecode(struct r600_shader *rshader, - struct r600_shader_node *rnode, - struct r600_shader_vfetch *vfetch, - unsigned *cid) -{ - unsigned id = *cid; - - vfetch->cf_addr = id; - rshader->bcode[id++] = S_SQ_VTX_WORD0_BUFFER_ID(vfetch->src[1].sel) | - S_SQ_VTX_WORD0_SRC_GPR(vfetch->src[0].sel) | - S_SQ_VTX_WORD0_SRC_SEL_X(vfetch->src[0].sel) | - S_SQ_VTX_WORD0_MEGA_FETCH_COUNT(0x1F); - rshader->bcode[id++] = S_SQ_VTX_WORD1_DST_SEL_X(vfetch->dst[0].chan) | - S_SQ_VTX_WORD1_DST_SEL_Y(vfetch->dst[1].chan) | - S_SQ_VTX_WORD1_DST_SEL_Z(vfetch->dst[2].chan) | - S_SQ_VTX_WORD1_DST_SEL_W(vfetch->dst[3].chan) | - S_SQ_VTX_WORD1_USE_CONST_FIELDS(1) | - S_SQ_VTX_WORD1_GPR_DST_GPR(vfetch->dst[0].sel); - rshader->bcode[id++] = S_SQ_VTX_WORD2_MEGA_FETCH(1); - rshader->bcode[id++] = 0; - *cid = id; - return 0; -} - -int r600_shader_update(struct r600_shader *rshader, enum pipe_format *resource_format) -{ - struct r600_shader_node *rnode; - struct r600_shader_vfetch *vfetch; - unsigned i; - - memcpy(rshader->resource_format, resource_format, - rshader->nresource * sizeof(enum pipe_format)); - LIST_FOR_EACH_ENTRY(rnode, &rshader->nodes, head) { - LIST_FOR_EACH_ENTRY(vfetch, &rnode->vfetch, head) { - const struct util_format_description *desc; - i = vfetch->cf_addr + 1; - rshader->bcode[i] &= C_SQ_VTX_WORD1_DST_SEL_X; - rshader->bcode[i] &= C_SQ_VTX_WORD1_DST_SEL_Y; - rshader->bcode[i] &= C_SQ_VTX_WORD1_DST_SEL_Z; - rshader->bcode[i] &= C_SQ_VTX_WORD1_DST_SEL_W; - desc = util_format_description(resource_format[vfetch->src[1].sel]); - if (desc == NULL) { - fprintf(stderr, "%s unknown format %d\n", __func__, resource_format[vfetch->src[1].sel]); - continue; - } - /* WARNING so far TGSI swizzle match R600 ones */ - rshader->bcode[i] |= S_SQ_VTX_WORD1_DST_SEL_X(desc->swizzle[0]); - rshader->bcode[i] |= S_SQ_VTX_WORD1_DST_SEL_Y(desc->swizzle[1]); - rshader->bcode[i] |= S_SQ_VTX_WORD1_DST_SEL_Z(desc->swizzle[2]); - rshader->bcode[i] |= S_SQ_VTX_WORD1_DST_SEL_W(desc->swizzle[3]); - } - } - return 0; -} - -int r600_shader_register(struct r600_shader *rshader) -{ - struct c_vector *v, *nv; - unsigned tid, cid, rid, i; - - rshader->nvector = rshader->cshader.nvectors; - rshader->gpr = calloc(rshader->nvector, sizeof(void*)); - if (rshader->gpr == NULL) - return -ENOMEM; - tid = 0; - cid = 0; - rid = 0; - /* alloc input first */ - LIST_FOR_EACH_ENTRY(v, &rshader->cshader.files[C_FILE_INPUT].vectors, head) { - nv = c_vector_new(); - if (nv == NULL) { - return -ENOMEM; - } - memcpy(nv, v, sizeof(struct c_vector)); - nv->id = tid++; - rshader->gpr[v->id] = nv; - } - for (i = 0; i < C_FILE_COUNT; i++) { - if (i == C_FILE_INPUT || i == C_FILE_IMMEDIATE) - continue; - LIST_FOR_EACH_ENTRY(v, &rshader->cshader.files[i].vectors, head) { - switch (v->file) { - case C_FILE_OUTPUT: - case C_FILE_TEMPORARY: - nv = c_vector_new(); - if (nv == NULL) { - return -ENOMEM; - } - memcpy(nv, v, sizeof(struct c_vector)); - nv->id = tid++; - rshader->gpr[v->id] = nv; - break; - case C_FILE_CONSTANT: - nv = c_vector_new(); - if (nv == NULL) { - return -ENOMEM; - } - memcpy(nv, v, sizeof(struct c_vector)); - nv->id = (cid++) + 256; - rshader->gpr[v->id] = nv; - break; - case C_FILE_RESOURCE: - nv = c_vector_new(); - if (nv == NULL) { - return -ENOMEM; - } - memcpy(nv, v, sizeof(struct c_vector)); - nv->id = (rid++); - rshader->gpr[v->id] = nv; - break; - default: - fprintf(stderr, "%s:%d unsupported file %d\n", __func__, __LINE__, v->file); - return -EINVAL; - } - } - } - rshader->ngpr = tid; - rshader->nconstant = cid; - rshader->nresource = rid; - return 0; -} - -int r600_shader_find_gpr(struct r600_shader *rshader, struct c_vector *v, unsigned swizzle, - struct r600_shader_operand *operand) -{ - struct c_vector *tmp; - - /* Values [0,127] correspond to GPR[0..127]. - * Values [256,511] correspond to cfile constants c[0..255]. - * Other special values are shown in the list below. - * 248 SQ_ALU_SRC_0: special constant 0.0. - * 249 SQ_ALU_SRC_1: special constant 1.0 float. - * 250 SQ_ALU_SRC_1_INT: special constant 1 integer. - * 251 SQ_ALU_SRC_M_1_INT: special constant -1 integer. - * 252 SQ_ALU_SRC_0_5: special constant 0.5 float. - * 253 SQ_ALU_SRC_LITERAL: literal constant. - * 254 SQ_ALU_SRC_PV: previous vector result. - * 255 SQ_ALU_SRC_PS: previous scalar result. - */ - operand->vector = v; - operand->sel = 248; - operand->chan = 0; - operand->neg = 0; - operand->abs = 0; - if (v == NULL) - return 0; - if (v->file == C_FILE_IMMEDIATE) { - operand->sel = 253; - } else { - tmp = rshader->gpr[v->id]; - if (tmp == NULL) { - fprintf(stderr, "%s %d unknown register\n", __FILE__, __LINE__); - return -EINVAL; - } - operand->sel = tmp->id; - } - operand->chan = swizzle; - switch (swizzle) { - case C_SWIZZLE_X: - case C_SWIZZLE_Y: - case C_SWIZZLE_Z: - case C_SWIZZLE_W: - break; - case C_SWIZZLE_0: - operand->sel = 248; - operand->chan = 0; - break; - case C_SWIZZLE_1: - operand->sel = 249; - operand->chan = 0; - break; - default: - fprintf(stderr, "%s %d invalid swizzle %d\n", __FILE__, __LINE__, swizzle); - return -EINVAL; - } - return 0; -} - -static struct r600_shader_node *r600_shader_new_node(struct r600_shader *rshader, struct c_node *node) -{ - struct r600_shader_node *rnode; - - rnode = CALLOC_STRUCT(r600_shader_node); - if (rnode == NULL) - return NULL; - rnode->node = node; - LIST_INITHEAD(&rnode->vfetch); -fprintf(stderr, "------------------------ new node (%p %p)\n", &rnode->vfetch, rnode->vfetch.next); - LIST_INITHEAD(&rnode->alu); - LIST_ADDTAIL(&rnode->head, &rshader->nodes); - return rnode; -} - -static int r600_shader_add_vfetch(struct r600_shader *rshader, - struct r600_shader_node *node, - struct c_instruction *instruction) -{ - struct r600_shader_vfetch *vfetch; - struct r600_shader_node *rnode; - int r; - - if (instruction == NULL) - return 0; - if (instruction->op[0].opcode != C_OPCODE_VFETCH) - return 0; - if (!LIST_IS_EMPTY(&node->alu)) { - rnode = r600_shader_new_node(rshader, node->node); - if (rnode == NULL) - return -ENOMEM; - node = rnode; - } - vfetch = calloc(1, sizeof(struct r600_shader_vfetch)); - if (vfetch == NULL) - return -ENOMEM; - r = r600_shader_find_gpr(rshader, instruction->op[0].output.vector, 0, &vfetch->dst[0]); - if (r) - return r; - r = r600_shader_find_gpr(rshader, instruction->op[0].input[0].vector, 0, &vfetch->src[0]); - if (r) - return r; - r = r600_shader_find_gpr(rshader, instruction->op[0].input[1].vector, 0, &vfetch->src[1]); - if (r) - return r; - vfetch->dst[0].chan = C_SWIZZLE_X; - vfetch->dst[1].chan = C_SWIZZLE_Y; - vfetch->dst[2].chan = C_SWIZZLE_Z; - vfetch->dst[3].chan = C_SWIZZLE_W; - LIST_ADDTAIL(&vfetch->head, &node->vfetch); - node->nslot += 2; - return 0; -} - -static int r600_node_translate(struct r600_shader *rshader, struct c_node *node) -{ - struct c_instruction *instruction; - struct r600_shader_node *rnode; - int r; - - rnode = r600_shader_new_node(rshader, node); - if (rnode == NULL) - return -ENOMEM; - LIST_FOR_EACH_ENTRY(instruction, &node->insts, head) { - switch (instruction->op[0].opcode) { - case C_OPCODE_VFETCH: - r = r600_shader_add_vfetch(rshader, rnode, instruction); - if (r) { - fprintf(stderr, "%s %d vfetch failed\n", __func__, __LINE__); - return r; - } - break; - default: - r = r600_shader_alu_translate(rshader, rnode, instruction); - if (r) { - fprintf(stderr, "%s %d alu failed\n", __func__, __LINE__); - return r; - } - break; - } - } - return 0; -} - -int r600_shader_translate_rec(struct r600_shader *rshader, struct c_node *node) -{ - struct c_node_link *link; - int r; - - if (node->opcode == C_OPCODE_END) - return 0; - r = r600_node_translate(rshader, node); - if (r) - return r; - LIST_FOR_EACH_ENTRY(link, &node->childs, head) { - r = r600_shader_translate_rec(rshader, link->node); - if (r) - return r; - } - return 0; -} - -static struct r600_shader_alu *r600_shader_insert_alu(struct r600_shader *rshader, struct r600_shader_node *node) -{ - struct r600_shader_alu *alu; - - alu = CALLOC_STRUCT(r600_shader_alu); - if (alu == NULL) - return NULL; - alu->alu[0].inst = INST_NOP; - alu->alu[1].inst = INST_NOP; - alu->alu[2].inst = INST_NOP; - alu->alu[3].inst = INST_NOP; - alu->alu[4].inst = INST_NOP; - alu->alu[0].opcode = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP; - alu->alu[1].opcode = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP; - alu->alu[2].opcode = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP; - alu->alu[3].opcode = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP; - alu->alu[4].opcode = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP; - alu->alu[1].dst.chan = 1; - alu->alu[2].dst.chan = 2; - alu->alu[3].dst.chan = 3; - LIST_ADDTAIL(&alu->head, &node->alu); - return alu; -} - -static int r600_shader_alu_translate(struct r600_shader *rshader, - struct r600_shader_node *node, - struct c_instruction *instruction) -{ - struct r600_shader_node *rnode; - struct r600_shader_alu *alu; - int i, j, r, litteral_lastcomp = -1; - - if (!LIST_IS_EMPTY(&node->vfetch)) { -fprintf(stderr, "------------------------ add node (%p %p)\n", &node->vfetch, node->vfetch.next); - rnode = r600_shader_new_node(rshader, node->node); - if (rnode == NULL) { - fprintf(stderr, "%s %d new node failed\n", __func__, __LINE__); - return -ENOMEM; - } - node = rnode; - } - - /* initialize alu */ - alu = r600_shader_insert_alu(rshader, node); - - /* check special operation like lit */ - - /* go through operation */ - for (i = 0; i < instruction->nop; i++) { - struct r600_alu_instruction *ainfo = &r600_alu_instruction[instruction->op[i].opcode]; - struct r600_instruction_info *iinfo = &r600_instruction_info[ainfo->instruction]; - unsigned comp; - - /* check that output is a valid component */ - comp = instruction->op[i].output.swizzle; - switch (comp) { - case C_SWIZZLE_X: - case C_SWIZZLE_Y: - case C_SWIZZLE_Z: - case C_SWIZZLE_W: - break; - case C_SWIZZLE_0: - case C_SWIZZLE_1: - default: - fprintf(stderr, "%s %d invalid output %d\n", __func__, __LINE__, comp); - return -EINVAL; - } - alu->alu[comp].inst = ainfo->instruction; - alu->alu[comp].opcode = iinfo->opcode; - alu->alu[comp].is_op3 = iinfo->is_op3; - for (j = 0; j < instruction->op[i].ninput; j++) { - r = r600_shader_find_gpr(rshader, instruction->op[i].input[j].vector, - instruction->op[i].input[j].swizzle, &alu->alu[comp].src[j]); - if (r) { - fprintf(stderr, "%s %d register failed\n", __FILE__, __LINE__); - return r; - } - if (instruction->op[i].input[j].vector->file == C_FILE_IMMEDIATE) { - r = instruction->op[i].input[j].swizzle; - switch (r) { - case C_SWIZZLE_X: - case C_SWIZZLE_Y: - case C_SWIZZLE_Z: - case C_SWIZZLE_W: - break; - case C_SWIZZLE_0: - case C_SWIZZLE_1: - default: - fprintf(stderr, "%s %d invalid input\n", __func__, __LINE__); - return -EINVAL; - } - alu->literal[r] = instruction->op[i].input[j].vector->channel[r]->value; - if (r > litteral_lastcomp) { - litteral_lastcomp = r; - } - } - } - r = r600_shader_find_gpr(rshader, instruction->op[i].output.vector, - instruction->op[i].output.swizzle, &alu->alu[comp].dst); - if (r) { - fprintf(stderr, "%s %d register failed\n", __FILE__, __LINE__); - return r; - } - } - switch (litteral_lastcomp) { - case 0: - case 1: - alu->nliteral = 2; - break; - case 2: - case 3: - alu->nliteral = 4; - break; - case -1: - default: - break; - } - for (i = 4; i >= 0; i--) { - if (alu->alu[i].inst != INST_NOP) { - alu->alu[i].last = 1; - alu->nalu = i + 1; - break; - } - } - return 0; -} - -void r600_shader_node_place(struct r600_shader *rshader) -{ - struct r600_shader_node *node, *nnode; - struct r600_shader_alu *alu, *nalu; - struct r600_shader_vfetch *vfetch, *nvfetch; - unsigned cf_id = 0, cf_addr = 0; - - rshader->ncf = 0; - rshader->nslot = 0; - LIST_FOR_EACH_ENTRY_SAFE(node, nnode, &rshader->nodes, head) { - LIST_FOR_EACH_ENTRY_SAFE(alu, nalu, &node->alu, head) { - node->nslot += alu->nalu; - node->nslot += alu->nliteral >> 1; - } - node->nfetch = 0; - LIST_FOR_EACH_ENTRY_SAFE(vfetch, nvfetch, &node->vfetch, head) { - node->nslot += 2; - node->nfetch += 1; - } - if (!LIST_IS_EMPTY(&node->vfetch)) { - /* fetch node need to be 16 bytes aligned*/ - cf_addr += 1; - cf_addr &= 0xFFFFFFFEUL; - } - node->cf_id = cf_id; - node->cf_addr = cf_addr; - cf_id += 2; - cf_addr += node->nslot * 2; - rshader->ncf++; - } - rshader->nslot = cf_addr; - LIST_FOR_EACH_ENTRY_SAFE(node, nnode, &rshader->nodes, head) { - node->cf_addr += cf_id * 2; - } - rshader->ncf += rshader->cshader.files[C_FILE_OUTPUT].nvectors; - rshader->ndw = rshader->ncf * 2 + rshader->nslot * 2; -} - -int r600_shader_legalize(struct r600_shader *rshader) -{ - return 0; -} - - -static int r600_cshader_legalize_rec(struct c_shader *shader, struct c_node *node) -{ - struct c_node_link *link; - struct c_instruction *i, *n; - struct c_operand operand; - unsigned k, inst; - int r; - - LIST_FOR_EACH_ENTRY(i, &node->insts, head) { - for (k = 0; k < i->nop; k++) { - switch (i->op[k].opcode) { - case C_OPCODE_SLT: - i->op[k].opcode = C_OPCODE_SGT; - memcpy(&operand, &i->op[k].input[0], sizeof(struct c_operand)); - memcpy(&i->op[k].input[0], &i->op[k].input[1], sizeof(struct c_operand)); - memcpy(&i->op[k].input[1], &operand, sizeof(struct c_operand)); - break; - default: - break; - } - inst = r600_alu_instruction[i->op[k].opcode].instruction; - if (r600_instruction_info[inst].is_trans && k < (i->nop -1)) { - /* split trans opcode */ - n = CALLOC_STRUCT(c_instruction); - if (n == NULL) - return -ENOMEM; - for (n->nop = 0, k = k + 1; k < i->nop; k++, n->nop++) { - memcpy(&n->op[n->nop - 0], &i->op[k], sizeof(struct c_op)); - } - i->nop -= n->nop; - LIST_ADD(&n->head, &i->head); - } - } - } - LIST_FOR_EACH_ENTRY(link, &node->childs, head) { - r = r600_cshader_legalize_rec(shader, link->node); - if (r) { - return r; - } - } - return 0; -} - -int r600_cshader_legalize(struct c_shader *shader) -{ - return r600_cshader_legalize_rec(shader, &shader->entry); -} - - -struct r600_instruction_info r600_instruction_info[] = { - {INST_ADD, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, 0, 0}, - {INST_MUL, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, 0, 0}, - {INST_MUL_IEEE, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL_IEEE, 0, 0}, - {INST_MAX, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, 0, 0}, - {INST_MIN, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, 0, 0}, - {INST_MAX_DX10, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX_DX10, 0, 0}, - {INST_MIN_DX10, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN_DX10, 0, 0}, - {INST_SETE, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, 0, 0}, - {INST_SETGT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, 0, 0}, - {INST_SETGE, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, 0, 0}, - {INST_SETNE, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, 0, 0}, - {INST_SETE_DX10, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE_DX10, 0, 0}, - {INST_SETGT_DX10, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT_DX10, 0, 0}, - {INST_SETGE_DX10, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE_DX10, 0, 0}, - {INST_SETNE_DX10, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE_DX10, 0, 0}, - {INST_FRACT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, 0, 0}, - {INST_TRUNC, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, 0, 0}, - {INST_CEIL, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CEIL, 0, 0}, - {INST_RNDNE, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RNDNE, 0, 0}, - {INST_FLOOR, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, 0, 0}, - {INST_MOVA, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA, 0, 0}, - {INST_MOVA_FLOOR, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_FLOOR, 0, 0}, - {INST_MOVA_INT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT, 0, 0}, - {INST_MOV, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, 0, 0}, - {INST_NOP, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, 0, 0}, - {INST_PRED_SETGT_UINT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT_UINT, 0, 0}, - {INST_PRED_SETGE_UINT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE_UINT, 0, 0}, - {INST_PRED_SETE, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE, 0, 0}, - {INST_PRED_SETGT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT, 0, 0}, - {INST_PRED_SETGE, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE, 0, 0}, - {INST_PRED_SETNE, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE, 0, 0}, - {INST_PRED_SET_INV, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SET_INV, 0, 0}, - {INST_PRED_SET_POP, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SET_POP, 0, 0}, - {INST_PRED_SET_CLR, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SET_CLR, 0, 0}, - {INST_PRED_SET_RESTORE, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SET_RESTORE, 0, 0}, - {INST_PRED_SETE_PUSH, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE_PUSH, 0, 0}, - {INST_PRED_SETGT_PUSH, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT_PUSH, 0, 0}, - {INST_PRED_SETGE_PUSH, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE_PUSH, 0, 0}, - {INST_PRED_SETNE_PUSH, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE_PUSH, 0, 0}, - {INST_KILLE, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLE, 0, 0}, - {INST_KILLGT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, 0, 0}, - {INST_KILLGE, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGE, 0, 0}, - {INST_KILLNE, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLNE, 0, 0}, - {INST_AND_INT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_AND_INT, 0, 0}, - {INST_OR_INT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_OR_INT, 0, 0}, - {INST_XOR_INT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_XOR_INT, 0, 0}, - {INST_NOT_INT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOT_INT, 0, 0}, - {INST_ADD_INT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT, 0, 0}, - {INST_SUB_INT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT, 0, 0}, - {INST_MAX_INT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX_INT, 0, 0}, - {INST_MIN_INT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN_INT, 0, 0}, - {INST_MAX_UINT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX_UINT, 0, 0}, - {INST_MIN_UINT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN_UINT, 0, 0}, - {INST_SETE_INT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE_INT, 0, 0}, - {INST_SETGT_INT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT_INT, 0, 0}, - {INST_SETGE_INT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE_INT, 0, 0}, - {INST_SETNE_INT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE_INT, 0, 0}, - {INST_SETGT_UINT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT_UINT, 0, 0}, - {INST_SETGE_UINT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE_UINT, 0, 0}, - {INST_KILLGT_UINT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT_UINT, 0, 0}, - {INST_KILLGE_UINT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGE_UINT, 0, 0}, - {INST_PRED_SETE_INT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE_INT, 0, 0}, - {INST_PRED_SETGT_INT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT_INT, 0, 0}, - {INST_PRED_SETGE_INT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE_INT, 0, 0}, - {INST_PRED_SETNE_INT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE_INT, 0, 0}, - {INST_KILLE_INT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLE_INT, 0, 0}, - {INST_KILLGT_INT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT_INT, 0, 0}, - {INST_KILLGE_INT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGE_INT, 0, 0}, - {INST_KILLNE_INT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLNE_INT, 0, 0}, - {INST_PRED_SETE_PUSH_INT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE_PUSH_INT, 0, 0}, - {INST_PRED_SETGT_PUSH_INT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT_PUSH_INT, 0, 0}, - {INST_PRED_SETGE_PUSH_INT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE_PUSH_INT, 0, 0}, - {INST_PRED_SETNE_PUSH_INT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE_PUSH_INT, 0, 0}, - {INST_PRED_SETLT_PUSH_INT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETLT_PUSH_INT, 0, 0}, - {INST_PRED_SETLE_PUSH_INT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETLE_PUSH_INT, 0, 0}, - {INST_DOT4, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, 0, 0}, - {INST_DOT4_IEEE, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4_IEEE, 0, 0}, - {INST_CUBE, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE, 0, 0}, - {INST_MAX4, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX4, 0, 0}, - {INST_MOVA_GPR_INT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_GPR_INT, 0, 0}, - {INST_EXP_IEEE, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, 1, 0}, - {INST_LOG_CLAMPED, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED, 1, 0}, - {INST_LOG_IEEE, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, 1, 0}, - {INST_RECIP_CLAMPED, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_CLAMPED, 1, 0}, - {INST_RECIP_FF, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_FF, 1, 0}, - {INST_RECIP_IEEE, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE, 1, 0}, - {INST_RECIPSQRT_CLAMPED, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_CLAMPED, 1, 0}, - {INST_RECIPSQRT_FF, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_FF, 1, 0}, - {INST_RECIPSQRT_IEEE, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE, 1, 0}, - {INST_SQRT_IEEE, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SQRT_IEEE, 1, 0}, - {INST_FLT_TO_INT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT, 1, 0}, - {INST_INT_TO_FLT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT, 1, 0}, - {INST_UINT_TO_FLT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_UINT_TO_FLT, 1, 0}, - {INST_SIN, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, 1, 0}, - {INST_COS, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, 1, 0}, - {INST_ASHR_INT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ASHR_INT, 1, 0}, - {INST_LSHR_INT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHR_INT, 1, 0}, - {INST_LSHL_INT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHL_INT, 1, 0}, - {INST_MULLO_INT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_INT, 1, 0}, - {INST_MULHI_INT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULHI_INT, 1, 0}, - {INST_MULLO_UINT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_UINT, 1, 0}, - {INST_MULHI_UINT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULHI_UINT, 1, 0}, - {INST_RECIP_INT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_INT, 1, 0}, - {INST_RECIP_UINT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_UINT, 1, 0}, - {INST_FLT_TO_UINT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_UINT, 1, 0}, - {INST_MUL_LIT, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT, 1, 1}, - {INST_MUL_LIT_M2, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT_M2, 1, 1}, - {INST_MUL_LIT_M4, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT_M4, 1, 1}, - {INST_MUL_LIT_D2, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT_D2, 1, 1}, - {INST_MULADD, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, 0, 1}, - {INST_MULADD_M2, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD_M2, 0, 1}, - {INST_MULADD_M4, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD_M4, 0, 1}, - {INST_MULADD_D2, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD_D2, 0, 1}, - {INST_MULADD_IEEE, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD_IEEE, 0, 1}, - {INST_MULADD_IEEE_M2, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD_IEEE_M2, 0, 1}, - {INST_MULADD_IEEE_M4, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD_IEEE_M4, 0, 1}, - {INST_MULADD_IEEE_D2, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD_IEEE_D2, 0, 1}, - {INST_CNDE, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDE, 0, 1}, - {INST_CNDGT, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT, 0, 1}, - {INST_CNDGE, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE, 0, 1}, - {INST_CNDE_INT, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDE_INT, 0, 1}, - {INST_CNDGT_INT, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT_INT, 0, 1}, - {INST_CNDGE_INT, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE_INT, 0, 1}, -}; - -struct r600_alu_instruction r600_alu_instruction[C_OPCODE_LAST] = { - {C_OPCODE_NOP, INST_NOP}, - {C_OPCODE_MOV, INST_MOV}, - {C_OPCODE_LIT, INST_NOP}, - {C_OPCODE_RCP, INST_RECIP_IEEE}, - {C_OPCODE_RSQ, INST_RECIPSQRT_IEEE}, - {C_OPCODE_EXP, INST_EXP_IEEE}, - {C_OPCODE_LOG, INST_LOG_IEEE}, - {C_OPCODE_MUL, INST_MUL}, - {C_OPCODE_ADD, INST_ADD}, - {C_OPCODE_DP3, INST_DOT4}, - {C_OPCODE_DP4, INST_DOT4}, - {C_OPCODE_DST, INST_NOP}, - {C_OPCODE_MIN, INST_MIN}, - {C_OPCODE_MAX, INST_MAX}, - {C_OPCODE_SLT, INST_NOP}, - {C_OPCODE_SGE, INST_NOP}, - {C_OPCODE_MAD, INST_MULADD}, - {C_OPCODE_SUB, INST_COUNT}, - {C_OPCODE_LRP, INST_NOP}, - {C_OPCODE_CND, INST_NOP}, - {20, INST_NOP}, - {C_OPCODE_DP2A, INST_NOP}, - {22, INST_NOP}, - {23, INST_NOP}, - {C_OPCODE_FRC, INST_NOP}, - {C_OPCODE_CLAMP, INST_NOP}, - {C_OPCODE_FLR, INST_NOP}, - {C_OPCODE_ROUND, INST_NOP}, - {C_OPCODE_EX2, INST_NOP}, - {C_OPCODE_LG2, INST_NOP}, - {C_OPCODE_POW, INST_NOP}, - {C_OPCODE_XPD, INST_NOP}, - {32, INST_NOP}, - {C_OPCODE_ABS, INST_COUNT}, - {C_OPCODE_RCC, INST_NOP}, - {C_OPCODE_DPH, INST_NOP}, - {C_OPCODE_COS, INST_COS}, - {C_OPCODE_DDX, INST_NOP}, - {C_OPCODE_DDY, INST_NOP}, - {C_OPCODE_KILP, INST_NOP}, - {C_OPCODE_PK2H, INST_NOP}, - {C_OPCODE_PK2US, INST_NOP}, - {C_OPCODE_PK4B, INST_NOP}, - {C_OPCODE_PK4UB, INST_NOP}, - {C_OPCODE_RFL, INST_NOP}, - {C_OPCODE_SEQ, INST_NOP}, - {C_OPCODE_SFL, INST_NOP}, - {C_OPCODE_SGT, INST_SETGT}, - {C_OPCODE_SIN, INST_SIN}, - {C_OPCODE_SLE, INST_NOP}, - {C_OPCODE_SNE, INST_NOP}, - {C_OPCODE_STR, INST_NOP}, - {C_OPCODE_TEX, INST_NOP}, - {C_OPCODE_TXD, INST_NOP}, - {C_OPCODE_TXP, INST_NOP}, - {C_OPCODE_UP2H, INST_NOP}, - {C_OPCODE_UP2US, INST_NOP}, - {C_OPCODE_UP4B, INST_NOP}, - {C_OPCODE_UP4UB, INST_NOP}, - {C_OPCODE_X2D, INST_NOP}, - {C_OPCODE_ARA, INST_NOP}, - {C_OPCODE_ARR, INST_NOP}, - {C_OPCODE_BRA, INST_NOP}, - {C_OPCODE_CAL, INST_NOP}, - {C_OPCODE_RET, INST_NOP}, - {C_OPCODE_SSG, INST_NOP}, - {C_OPCODE_CMP, INST_NOP}, - {C_OPCODE_SCS, INST_NOP}, - {C_OPCODE_TXB, INST_NOP}, - {C_OPCODE_NRM, INST_NOP}, - {C_OPCODE_DIV, INST_NOP}, - {C_OPCODE_DP2, INST_NOP}, - {C_OPCODE_TXL, INST_NOP}, - {C_OPCODE_BRK, INST_NOP}, - {C_OPCODE_IF, INST_NOP}, - {C_OPCODE_BGNFOR, INST_NOP}, - {C_OPCODE_REP, INST_NOP}, - {C_OPCODE_ELSE, INST_NOP}, - {C_OPCODE_ENDIF, INST_NOP}, - {C_OPCODE_ENDFOR, INST_NOP}, - {C_OPCODE_ENDREP, INST_NOP}, - {C_OPCODE_PUSHA, INST_NOP}, - {C_OPCODE_POPA, INST_NOP}, - {C_OPCODE_CEIL, INST_NOP}, - {C_OPCODE_I2F, INST_NOP}, - {C_OPCODE_NOT, INST_NOP}, - {C_OPCODE_TRUNC, INST_NOP}, - {C_OPCODE_SHL, INST_NOP}, - {88, INST_NOP}, - {C_OPCODE_AND, INST_NOP}, - {C_OPCODE_OR, INST_NOP}, - {C_OPCODE_MOD, INST_NOP}, - {C_OPCODE_XOR, INST_NOP}, - {C_OPCODE_SAD, INST_NOP}, - {C_OPCODE_TXF, INST_NOP}, - {C_OPCODE_TXQ, INST_NOP}, - {C_OPCODE_CONT, INST_NOP}, - {C_OPCODE_EMIT, INST_NOP}, - {C_OPCODE_ENDPRIM, INST_NOP}, - {C_OPCODE_BGNLOOP, INST_NOP}, - {C_OPCODE_BGNSUB, INST_NOP}, - {C_OPCODE_ENDLOOP, INST_NOP}, - {C_OPCODE_ENDSUB, INST_NOP}, - {103, INST_NOP}, - {104, INST_NOP}, - {105, INST_NOP}, - {106, INST_NOP}, - {107, INST_NOP}, - {108, INST_NOP}, - {109, INST_NOP}, - {110, INST_NOP}, - {111, INST_NOP}, - {C_OPCODE_NRM4, INST_NOP}, - {C_OPCODE_CALLNZ, INST_NOP}, - {C_OPCODE_IFC, INST_NOP}, - {C_OPCODE_BREAKC, INST_NOP}, - {C_OPCODE_KIL, INST_NOP}, - {C_OPCODE_END, INST_NOP}, - {118, INST_NOP}, - {C_OPCODE_F2I, INST_NOP}, - {C_OPCODE_IDIV, INST_NOP}, - {C_OPCODE_IMAX, INST_NOP}, - {C_OPCODE_IMIN, INST_NOP}, - {C_OPCODE_INEG, INST_NOP}, - {C_OPCODE_ISGE, INST_NOP}, - {C_OPCODE_ISHR, INST_NOP}, - {C_OPCODE_ISLT, INST_NOP}, - {C_OPCODE_F2U, INST_NOP}, - {C_OPCODE_U2F, INST_NOP}, - {C_OPCODE_UADD, INST_NOP}, - {C_OPCODE_UDIV, INST_NOP}, - {C_OPCODE_UMAD, INST_NOP}, - {C_OPCODE_UMAX, INST_NOP}, - {C_OPCODE_UMIN, INST_NOP}, - {C_OPCODE_UMOD, INST_NOP}, - {C_OPCODE_UMUL, INST_NOP}, - {C_OPCODE_USEQ, INST_NOP}, - {C_OPCODE_USGE, INST_NOP}, - {C_OPCODE_USHR, INST_NOP}, - {C_OPCODE_USLT, INST_NOP}, - {C_OPCODE_USNE, INST_NOP}, - {C_OPCODE_SWITCH, INST_NOP}, - {C_OPCODE_CASE, INST_NOP}, - {C_OPCODE_DEFAULT, INST_NOP}, - {C_OPCODE_ENDSWITCH, INST_NOP}, - {C_OPCODE_VFETCH, INST_NOP}, - {C_OPCODE_ENTRY, INST_NOP}, - {C_OPCODE_ARL, INST_NOP}, -}; - - -static int r600_shader_alu_bytecode(struct r600_shader *rshader, - struct r600_shader_node *rnode, - struct r600_shader_inst *alu, - unsigned *cid) -{ - unsigned id = *cid; - - /* don't replace gpr by pv or ps for destination register */ - if (alu->is_op3) { - rshader->bcode[id++] = S_SQ_ALU_WORD0_SRC0_SEL(alu->src[0].sel) | - S_SQ_ALU_WORD0_SRC0_CHAN(alu->src[0].chan) | - S_SQ_ALU_WORD0_SRC1_SEL(alu->src[1].sel) | - S_SQ_ALU_WORD0_SRC1_CHAN(alu->src[1].chan) | - S_SQ_ALU_WORD0_LAST(alu->last); - rshader->bcode[id++] = S_SQ_ALU_WORD1_DST_GPR(alu->dst.sel) | - S_SQ_ALU_WORD1_DST_CHAN(alu->dst.chan) | - S_SQ_ALU_WORD1_OP3_SRC2_SEL(alu->src[2].sel) | - S_SQ_ALU_WORD1_OP3_SRC2_CHAN(alu->src[2].chan) | - S_SQ_ALU_WORD1_OP3_SRC2_NEG(alu->src[2].neg) | - S_SQ_ALU_WORD1_OP3_ALU_INST(alu->opcode) | - S_SQ_ALU_WORD1_BANK_SWIZZLE(0); - } else { - rshader->bcode[id++] = S_SQ_ALU_WORD0_SRC0_SEL(alu->src[0].sel) | - S_SQ_ALU_WORD0_SRC0_CHAN(alu->src[0].chan) | - S_SQ_ALU_WORD0_SRC0_NEG(alu->src[0].neg) | - S_SQ_ALU_WORD0_SRC1_SEL(alu->src[1].sel) | - S_SQ_ALU_WORD0_SRC1_CHAN(alu->src[1].chan) | - S_SQ_ALU_WORD0_SRC1_NEG(alu->src[1].neg) | - S_SQ_ALU_WORD0_LAST(alu->last); - rshader->bcode[id++] = S_SQ_ALU_WORD1_DST_GPR(alu->dst.sel) | - S_SQ_ALU_WORD1_DST_CHAN(alu->dst.chan) | - S_SQ_ALU_WORD1_OP2_SRC0_ABS(alu->src[0].abs) | - S_SQ_ALU_WORD1_OP2_SRC1_ABS(alu->src[1].abs) | - S_SQ_ALU_WORD1_OP2_WRITE_MASK(1) | - S_SQ_ALU_WORD1_OP2_ALU_INST(alu->opcode) | - S_SQ_ALU_WORD1_BANK_SWIZZLE(0); - } - *cid = id; - return 0; -} - -int r6xx_shader_alu_translate(struct r600_shader *rshader, - struct r600_shader_node *rnode, - unsigned *cid) -{ - struct r600_shader_alu *alu; - unsigned id = *cid; - int i; - int r = 0; - LIST_FOR_EACH_ENTRY(alu, &rnode->alu, head) { - for (i = 0; i < alu->nalu; i++) { - r = r600_shader_alu_bytecode(rshader, rnode, &alu->alu[i], &id); - if (r) - goto out; - } - for (i = 0; i < alu->nliteral; i++) { - rshader->bcode[id++] = alu->literal[i]; - } - } -out: - *cid = id; - return r; -} diff --git a/src/gallium/drivers/r600/r600_compiler_r700.c b/src/gallium/drivers/r600/r600_compiler_r700.c deleted file mode 100644 index 0b43942866..0000000000 --- a/src/gallium/drivers/r600/r600_compiler_r700.c +++ /dev/null @@ -1,233 +0,0 @@ -/* - * Copyright 2010 Jerome Glisse - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - */ -#include -#include -#include -#include -#include -#include "r600_context.h" -#include "r700_sq.h" - -static int r700_shader_cf_node_bytecode(struct r600_shader *rshader, - struct r600_shader_node *rnode, - unsigned *cid) -{ - unsigned id = *cid; - - if (rnode->nfetch) { - rshader->bcode[id++] = S_SQ_CF_WORD0_ADDR(rnode->cf_addr >> 1); - rshader->bcode[id++] = S_SQ_CF_WORD1_CF_INST(V_SQ_CF_WORD1_SQ_CF_INST_VTX) | - S_SQ_CF_WORD1_BARRIER(1) | - S_SQ_CF_WORD1_COUNT(rnode->nfetch - 1); - } else { - rshader->bcode[id++] = S_SQ_CF_ALU_WORD0_ADDR(rnode->cf_addr >> 1); - rshader->bcode[id++] = S_SQ_CF_ALU_WORD1_CF_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU) | - S_SQ_CF_ALU_WORD1_BARRIER(1) | - S_SQ_CF_ALU_WORD1_COUNT(rnode->nslot - 1); - } - *cid = id; - return 0; -} - -static int r700_shader_cf_output_bytecode(struct r600_shader *rshader, - struct c_vector *v, - unsigned *cid, - unsigned end) -{ - struct r600_shader_operand out; - unsigned id = *cid; - int r; - - r = r600_shader_find_gpr(rshader, v, 0, &out); - if (r) - return r; - rshader->bcode[id + 0] = S_SQ_CF_ALLOC_EXPORT_WORD0_RW_GPR(out.sel) | - S_SQ_CF_ALLOC_EXPORT_WORD0_ELEM_SIZE(3); - rshader->bcode[id + 1] = S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_X(0) | - S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Y(1) | - S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Z(2) | - S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_W(3) | - S_SQ_CF_ALLOC_EXPORT_WORD1_BARRIER(1) | - S_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE) | - S_SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM(end); - switch (v->name) { - case C_SEMANTIC_POSITION: - rshader->bcode[id + 0] |= S_SQ_CF_ALLOC_EXPORT_WORD0_ARRAY_BASE(60) | - S_SQ_CF_ALLOC_EXPORT_WORD0_TYPE(V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS); - break; - case C_SEMANTIC_COLOR: - if (rshader->cshader.type == C_PROGRAM_TYPE_VS) { - rshader->output[rshader->noutput].gpr = out.sel; - rshader->output[rshader->noutput].sid = v->sid; - rshader->output[rshader->noutput].name = v->name; - rshader->bcode[id + 0] |= S_SQ_CF_ALLOC_EXPORT_WORD0_ARRAY_BASE(rshader->noutput++) | - S_SQ_CF_ALLOC_EXPORT_WORD0_TYPE(V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM); - } else { - rshader->bcode[id + 0] |= S_SQ_CF_ALLOC_EXPORT_WORD0_ARRAY_BASE(0) | - S_SQ_CF_ALLOC_EXPORT_WORD0_TYPE(V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL); - } - break; - case C_SEMANTIC_GENERIC: - rshader->output[rshader->noutput].gpr = out.sel; - rshader->output[rshader->noutput].sid = v->sid; - rshader->output[rshader->noutput].name = v->name; - rshader->bcode[id + 0] |= S_SQ_CF_ALLOC_EXPORT_WORD0_ARRAY_BASE(rshader->noutput++) | - S_SQ_CF_ALLOC_EXPORT_WORD0_TYPE(V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM); - break; - default: - fprintf(stderr, "%s:%d unsupported\n", __func__, __LINE__); - return -EINVAL; - } - *cid = id + 2; - return 0; -} - -static int r700_shader_alu_bytecode(struct r600_shader *rshader, - struct r600_shader_node *rnode, - struct r600_shader_inst *alu, - unsigned *cid) -{ - unsigned id = *cid; - - /* don't replace gpr by pv or ps for destination register */ - if (alu->is_op3) { - rshader->bcode[id++] = S_SQ_ALU_WORD0_SRC0_SEL(alu->src[0].sel) | - S_SQ_ALU_WORD0_SRC0_CHAN(alu->src[0].chan) | - S_SQ_ALU_WORD0_SRC1_SEL(alu->src[1].sel) | - S_SQ_ALU_WORD0_SRC1_CHAN(alu->src[1].chan) | - S_SQ_ALU_WORD0_LAST(alu->last); - rshader->bcode[id++] = S_SQ_ALU_WORD1_DST_GPR(alu->dst.sel) | - S_SQ_ALU_WORD1_DST_CHAN(alu->dst.chan) | - S_SQ_ALU_WORD1_OP3_SRC2_SEL(alu->src[2].sel) | - S_SQ_ALU_WORD1_OP3_SRC2_CHAN(alu->src[2].chan) | - S_SQ_ALU_WORD1_OP3_SRC2_NEG(alu->src[2].neg) | - S_SQ_ALU_WORD1_OP3_ALU_INST(alu->opcode) | - S_SQ_ALU_WORD1_BANK_SWIZZLE(0); - } else { - rshader->bcode[id++] = S_SQ_ALU_WORD0_SRC0_SEL(alu->src[0].sel) | - S_SQ_ALU_WORD0_SRC0_CHAN(alu->src[0].chan) | - S_SQ_ALU_WORD0_SRC0_NEG(alu->src[0].neg) | - S_SQ_ALU_WORD0_SRC1_SEL(alu->src[1].sel) | - S_SQ_ALU_WORD0_SRC1_CHAN(alu->src[1].chan) | - S_SQ_ALU_WORD0_SRC1_NEG(alu->src[1].neg) | - S_SQ_ALU_WORD0_LAST(alu->last); - rshader->bcode[id++] = S_SQ_ALU_WORD1_DST_GPR(alu->dst.sel) | - S_SQ_ALU_WORD1_DST_CHAN(alu->dst.chan) | - S_SQ_ALU_WORD1_OP2_SRC0_ABS(alu->src[0].abs) | - S_SQ_ALU_WORD1_OP2_SRC1_ABS(alu->src[1].abs) | - S_SQ_ALU_WORD1_OP2_WRITE_MASK(1) | - S_SQ_ALU_WORD1_OP2_ALU_INST(alu->opcode) | - S_SQ_ALU_WORD1_BANK_SWIZZLE(0); - } - *cid = id; - return 0; -} - -static int r700_shader_alu_translate(struct r600_shader *rshader, - struct r600_shader_node *rnode, - unsigned *cid) - -{ - struct r600_shader_alu *alu; - unsigned id = *cid; - int i; - int r = 0; - LIST_FOR_EACH_ENTRY(alu, &rnode->alu, head) { - for (i = 0; i < alu->nalu; i++) { - r = r700_shader_alu_bytecode(rshader, rnode, &alu->alu[i], &id); - if (r) - goto out; - } - for (i = 0; i < alu->nliteral; i++) { - rshader->bcode[id++] = alu->literal[i]; - } - } - out: - *cid = id; - return r; -} - -int r700_shader_translate(struct r600_shader *rshader) -{ - struct c_shader *shader = &rshader->cshader; - struct r600_shader_node *rnode; - struct r600_shader_vfetch *vfetch; - struct c_vector *v; - unsigned id, end; - int r; - - r = r600_shader_register(rshader); - if (r) { - fprintf(stderr, "%s %d register allocation failed\n", __FILE__, __LINE__); - return r; - } - r = r600_shader_translate_rec(rshader, &shader->entry); - if (r) { - fprintf(stderr, "%s %d translation failed\n", __FILE__, __LINE__); - return r; - } - r = r600_shader_legalize(rshader); - if (r) { - fprintf(stderr, "%s %d legalize failed\n", __FILE__, __LINE__); - return r; - } - r600_shader_node_place(rshader); - rshader->bcode = malloc(rshader->ndw * 4); - if (rshader->bcode == NULL) - return -ENOMEM; - LIST_FOR_EACH_ENTRY(rnode, &rshader->nodes, head) { - id = rnode->cf_addr; - LIST_FOR_EACH_ENTRY(vfetch, &rnode->vfetch, head) { - r = r600_shader_vfetch_bytecode(rshader, rnode, vfetch, &id); - if (r) - return r; - } - if (rshader->r6xx_compile) - r = r6xx_shader_alu_translate(rshader, rnode, &id); - else - r = r700_shader_alu_translate(rshader, rnode, &id); - if (r) - return r; - } - id = 0; - LIST_FOR_EACH_ENTRY(rnode, &rshader->nodes, head) { - r = r700_shader_cf_node_bytecode(rshader, rnode, &id); - if (r) - return r; - } - LIST_FOR_EACH_ENTRY(v, &rshader->cshader.files[C_FILE_OUTPUT].vectors, head) { - end = 0; - if (v->head.next == &rshader->cshader.files[C_FILE_OUTPUT].vectors) - end = 1; - r = r700_shader_cf_output_bytecode(rshader, v, &id, end); - if (r) - return r; - } - LIST_FOR_EACH_ENTRY(v, &rshader->cshader.files[C_FILE_INPUT].vectors, head) { - rshader->input[rshader->ninput].gpr = rshader->ninput; - rshader->input[rshader->ninput].sid = v->sid; - rshader->input[rshader->ninput].name = v->name; - rshader->ninput++; - } - return 0; -} diff --git a/src/gallium/drivers/r600/r600_compiler_tgsi.c b/src/gallium/drivers/r600/r600_compiler_tgsi.c deleted file mode 100644 index 172cf154a3..0000000000 --- a/src/gallium/drivers/r600/r600_compiler_tgsi.c +++ /dev/null @@ -1,730 +0,0 @@ -/* - * Copyright 2010 Jerome Glisse - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - */ -#include -#include -#include -#include -#include -#include -#include -#include "r600_shader.h" -#include "r600_context.h" - -struct tgsi_shader { - struct c_vector **v[TGSI_FILE_COUNT]; - struct tgsi_shader_info info; - struct tgsi_parse_context parser; - const struct tgsi_token *tokens; - struct c_shader *shader; - struct c_node *node; -}; - -static unsigned tgsi_file_to_c_file(unsigned file); -static unsigned tgsi_sname_to_c_sname(unsigned sname); -static int tgsi_opcode_to_c_opcode(unsigned opcode, unsigned *copcode); - -static int tgsi_shader_init(struct tgsi_shader *ts, - const struct tgsi_token *tokens, - struct c_shader *shader) -{ - int i; - - ts->shader = shader; - ts->tokens = tokens; - tgsi_scan_shader(ts->tokens, &ts->info); - tgsi_parse_init(&ts->parser, ts->tokens); - /* initialize to NULL in case of error */ - for (i = 0; i < C_FILE_COUNT; i++) { - ts->v[i] = NULL; - } - for (i = 0; i < TGSI_FILE_COUNT; i++) { - if (ts->info.file_count[i] > 0) { - ts->v[i] = calloc(ts->info.file_count[i], sizeof(void*)); - if (ts->v[i] == NULL) { - fprintf(stderr, "%s:%d unsupported %d %d\n", __func__, __LINE__, i, ts->info.file_count[i]); - return -ENOMEM; - } - } - } - return 0; -} - -static void tgsi_shader_destroy(struct tgsi_shader *ts) -{ - int i; - - for (i = 0; i < TGSI_FILE_COUNT; i++) { - free(ts->v[i]); - } - tgsi_parse_free(&ts->parser); -} - -static int ntransform_declaration(struct tgsi_shader *ts) -{ - struct tgsi_full_declaration *fd = &ts->parser.FullToken.FullDeclaration; - struct c_vector *v; - unsigned file; - unsigned name; - int sid; - int i; - - if (fd->Declaration.Dimension) { - fprintf(stderr, "%s:%d unsupported\n", __func__, __LINE__); - return -EINVAL; - } - for (i = fd->Range.First ; i <= fd->Range.Last; i++) { - sid = i; - name = C_SEMANTIC_GENERIC; - file = tgsi_file_to_c_file(fd->Declaration.File); - if (file == TGSI_FILE_NULL) { - fprintf(stderr, "%s:%d unsupported\n", __func__, __LINE__); - return -EINVAL; - } - if (fd->Declaration.Semantic) { - name = tgsi_sname_to_c_sname(fd->Semantic.Name); - sid = fd->Semantic.Index; - } - v = c_shader_vector_new(ts->shader, file, name, sid); - if (v == NULL) { - fprintf(stderr, "%s:%d unsupported\n", __func__, __LINE__); - return -ENOMEM; - } - ts->v[fd->Declaration.File][i] = v; - } - return 0; -} - -static int ntransform_immediate(struct tgsi_shader *ts) -{ - struct tgsi_full_immediate *fd = &ts->parser.FullToken.FullImmediate; - struct c_vector *v; - unsigned file; - unsigned name; - - if (fd->Immediate.DataType != TGSI_IMM_FLOAT32) { - fprintf(stderr, "%s:%d unsupported\n", __func__, __LINE__); - return -EINVAL; - } - name = C_SEMANTIC_GENERIC; - file = C_FILE_IMMEDIATE; - v = c_shader_vector_new(ts->shader, file, name, 0); - if (v == NULL) { - fprintf(stderr, "%s:%d unsupported\n", __func__, __LINE__); - return -ENOMEM; - } - v->channel[0]->value = fd->u[0].Uint; - v->channel[1]->value = fd->u[1].Uint; - v->channel[2]->value = fd->u[2].Uint; - v->channel[3]->value = fd->u[3].Uint; - ts->v[TGSI_FILE_IMMEDIATE][0] = v; - return 0; -} - -static int ntransform_instruction(struct tgsi_shader *ts) -{ - struct tgsi_full_instruction *fi = &ts->parser.FullToken.FullInstruction; - struct c_shader *shader = ts->shader; - struct c_instruction instruction; - unsigned opcode; - int i, j, r; - - if (fi->Instruction.NumDstRegs > 1) { - fprintf(stderr, "%s %d unsupported\n", __func__, __LINE__); - return -EINVAL; - } - if (fi->Instruction.Saturate) { - fprintf(stderr, "%s %d unsupported\n", __func__, __LINE__); - return -EINVAL; - } - if (fi->Instruction.Predicate) { - fprintf(stderr, "%s %d unsupported\n", __func__, __LINE__); - return -EINVAL; - } - if (fi->Instruction.Label) { - fprintf(stderr, "%s %d unsupported\n", __func__, __LINE__); - return -EINVAL; - } - if (fi->Instruction.Texture) { - fprintf(stderr, "%s %d unsupported\n", __func__, __LINE__); - return -EINVAL; - } - for (i = 0; i < fi->Instruction.NumSrcRegs; i++) { - if (fi->Src[i].Register.Indirect || - fi->Src[i].Register.Dimension || - fi->Src[i].Register.Absolute) { - fprintf(stderr, "%s %d unsupported\n", __func__, __LINE__); - return -EINVAL; - } - } - for (i = 0; i < fi->Instruction.NumDstRegs; i++) { - if (fi->Dst[i].Register.Indirect || fi->Dst[i].Register.Dimension) { - fprintf(stderr, "%s %d unsupported\n", __func__, __LINE__); - return -EINVAL; - } - } - r = tgsi_opcode_to_c_opcode(fi->Instruction.Opcode, &opcode); - if (r) { - fprintf(stderr, "%s:%d unsupported\n", __func__, __LINE__); - return r; - } - if (opcode == C_OPCODE_END) { - return c_node_cfg_link(ts->node, &shader->end); - } - /* FIXME add flow instruction handling */ - memset(&instruction, 0, sizeof(struct c_instruction)); - instruction.nop = 0; - for (j = 0; j < 4; j++) { - instruction.op[instruction.nop].opcode = opcode; - instruction.op[instruction.nop].ninput = fi->Instruction.NumSrcRegs; - for (i = 0; i < fi->Instruction.NumSrcRegs; i++) { - instruction.op[instruction.nop].input[i].vector = ts->v[fi->Src[i].Register.File][fi->Src[i].Register.Index]; - switch (j) { - case 0: - instruction.op[instruction.nop].input[i].swizzle = fi->Src[i].Register.SwizzleX; - break; - case 1: - instruction.op[instruction.nop].input[i].swizzle = fi->Src[i].Register.SwizzleY; - break; - case 2: - instruction.op[instruction.nop].input[i].swizzle = fi->Src[i].Register.SwizzleZ; - break; - case 3: - instruction.op[instruction.nop].input[i].swizzle = fi->Src[i].Register.SwizzleW; - break; - default: - return -EINVAL; - } - } - instruction.op[instruction.nop].output.vector = ts->v[fi->Dst[0].Register.File][fi->Dst[0].Register.Index]; - switch (j) { - case 0: - instruction.op[instruction.nop].output.swizzle = (fi->Dst[0].Register.WriteMask & 0x1) ? C_SWIZZLE_X : C_SWIZZLE_D; - break; - case 1: - instruction.op[instruction.nop].output.swizzle = (fi->Dst[0].Register.WriteMask & 0x1) ? C_SWIZZLE_Y : C_SWIZZLE_D; - break; - case 2: - instruction.op[instruction.nop].output.swizzle = (fi->Dst[0].Register.WriteMask & 0x1) ? C_SWIZZLE_Z : C_SWIZZLE_D; - break; - case 3: - instruction.op[instruction.nop].output.swizzle = (fi->Dst[0].Register.WriteMask & 0x1) ? C_SWIZZLE_W : C_SWIZZLE_D; - break; - default: - return -EINVAL; - } - instruction.nop++; - } - return c_node_add_new_instruction(ts->node, &instruction); -} - -int c_shader_from_tgsi(struct c_shader *shader, unsigned type, - const struct tgsi_token *tokens) -{ - struct tgsi_shader ts; - int r = 0; - - c_shader_init(shader, type); - r = tgsi_shader_init(&ts, tokens, shader); - if (r) - goto out_err; - ts.shader = shader; - ts.node = &shader->entry; - while (!tgsi_parse_end_of_tokens(&ts.parser)) { - tgsi_parse_token(&ts.parser); - switch (ts.parser.FullToken.Token.Type) { - case TGSI_TOKEN_TYPE_IMMEDIATE: - r = ntransform_immediate(&ts); - if (r) - goto out_err; - break; - case TGSI_TOKEN_TYPE_DECLARATION: - r = ntransform_declaration(&ts); - if (r) - goto out_err; - break; - case TGSI_TOKEN_TYPE_INSTRUCTION: - r = ntransform_instruction(&ts); - if (r) - goto out_err; - break; - default: - r = -EINVAL; - goto out_err; - } - } - tgsi_shader_destroy(&ts); - return 0; -out_err: - c_shader_destroy(shader); - tgsi_shader_destroy(&ts); - return r; -} - -static unsigned tgsi_file_to_c_file(unsigned file) -{ - switch (file) { - case TGSI_FILE_CONSTANT: - return C_FILE_CONSTANT; - case TGSI_FILE_INPUT: - return C_FILE_INPUT; - case TGSI_FILE_OUTPUT: - return C_FILE_OUTPUT; - case TGSI_FILE_TEMPORARY: - return C_FILE_TEMPORARY; - case TGSI_FILE_SAMPLER: - return C_FILE_SAMPLER; - case TGSI_FILE_ADDRESS: - return C_FILE_ADDRESS; - case TGSI_FILE_IMMEDIATE: - return C_FILE_IMMEDIATE; - case TGSI_FILE_PREDICATE: - return C_FILE_PREDICATE; - case TGSI_FILE_SYSTEM_VALUE: - return C_FILE_SYSTEM_VALUE; - case TGSI_FILE_NULL: - return C_FILE_NULL; - default: - fprintf(stderr, "%s:%d unsupported file %d\n", __func__, __LINE__, file); - return C_FILE_NULL; - } -} - -static unsigned tgsi_sname_to_c_sname(unsigned sname) -{ - switch (sname) { - case TGSI_SEMANTIC_POSITION: - return C_SEMANTIC_POSITION; - case TGSI_SEMANTIC_COLOR: - return C_SEMANTIC_COLOR; - case TGSI_SEMANTIC_BCOLOR: - return C_SEMANTIC_BCOLOR; - case TGSI_SEMANTIC_FOG: - return C_SEMANTIC_FOG; - case TGSI_SEMANTIC_PSIZE: - return C_SEMANTIC_PSIZE; - case TGSI_SEMANTIC_GENERIC: - return C_SEMANTIC_GENERIC; - case TGSI_SEMANTIC_NORMAL: - return C_SEMANTIC_NORMAL; - case TGSI_SEMANTIC_FACE: - return C_SEMANTIC_FACE; - case TGSI_SEMANTIC_EDGEFLAG: - return C_SEMANTIC_EDGEFLAG; - case TGSI_SEMANTIC_PRIMID: - return C_SEMANTIC_PRIMID; - case TGSI_SEMANTIC_INSTANCEID: - return C_SEMANTIC_INSTANCEID; - default: - return C_SEMANTIC_GENERIC; - } -} - -static int tgsi_opcode_to_c_opcode(unsigned opcode, unsigned *copcode) -{ - switch (opcode) { - case TGSI_OPCODE_MOV: - *copcode = C_OPCODE_MOV; - return 0; - case TGSI_OPCODE_MUL: - *copcode = C_OPCODE_MUL; - return 0; - case TGSI_OPCODE_MAD: - *copcode = C_OPCODE_MAD; - return 0; - case TGSI_OPCODE_END: - *copcode = C_OPCODE_END; - return 0; - case TGSI_OPCODE_ARL: - *copcode = C_OPCODE_ARL; - return 0; - case TGSI_OPCODE_LIT: - *copcode = C_OPCODE_LIT; - return 0; - case TGSI_OPCODE_RCP: - *copcode = C_OPCODE_RCP; - return 0; - case TGSI_OPCODE_RSQ: - *copcode = C_OPCODE_RSQ; - return 0; - case TGSI_OPCODE_EXP: - *copcode = C_OPCODE_EXP; - return 0; - case TGSI_OPCODE_LOG: - *copcode = C_OPCODE_LOG; - return 0; - case TGSI_OPCODE_ADD: - *copcode = C_OPCODE_ADD; - return 0; - case TGSI_OPCODE_DP3: - *copcode = C_OPCODE_DP3; - return 0; - case TGSI_OPCODE_DP4: - *copcode = C_OPCODE_DP4; - return 0; - case TGSI_OPCODE_DST: - *copcode = C_OPCODE_DST; - return 0; - case TGSI_OPCODE_MIN: - *copcode = C_OPCODE_MIN; - return 0; - case TGSI_OPCODE_MAX: - *copcode = C_OPCODE_MAX; - return 0; - case TGSI_OPCODE_SLT: - *copcode = C_OPCODE_SLT; - return 0; - case TGSI_OPCODE_SGE: - *copcode = C_OPCODE_SGE; - return 0; - case TGSI_OPCODE_SUB: - *copcode = C_OPCODE_SUB; - return 0; - case TGSI_OPCODE_LRP: - *copcode = C_OPCODE_LRP; - return 0; - case TGSI_OPCODE_CND: - *copcode = C_OPCODE_CND; - return 0; - case TGSI_OPCODE_DP2A: - *copcode = C_OPCODE_DP2A; - return 0; - case TGSI_OPCODE_FRC: - *copcode = C_OPCODE_FRC; - return 0; - case TGSI_OPCODE_CLAMP: - *copcode = C_OPCODE_CLAMP; - return 0; - case TGSI_OPCODE_FLR: - *copcode = C_OPCODE_FLR; - return 0; - case TGSI_OPCODE_ROUND: - *copcode = C_OPCODE_ROUND; - return 0; - case TGSI_OPCODE_EX2: - *copcode = C_OPCODE_EX2; - return 0; - case TGSI_OPCODE_LG2: - *copcode = C_OPCODE_LG2; - return 0; - case TGSI_OPCODE_POW: - *copcode = C_OPCODE_POW; - return 0; - case TGSI_OPCODE_XPD: - *copcode = C_OPCODE_XPD; - return 0; - case TGSI_OPCODE_ABS: - *copcode = C_OPCODE_ABS; - return 0; - case TGSI_OPCODE_RCC: - *copcode = C_OPCODE_RCC; - return 0; - case TGSI_OPCODE_DPH: - *copcode = C_OPCODE_DPH; - return 0; - case TGSI_OPCODE_COS: - *copcode = C_OPCODE_COS; - return 0; - case TGSI_OPCODE_DDX: - *copcode = C_OPCODE_DDX; - return 0; - case TGSI_OPCODE_DDY: - *copcode = C_OPCODE_DDY; - return 0; - case TGSI_OPCODE_KILP: - *copcode = C_OPCODE_KILP; - return 0; - case TGSI_OPCODE_PK2H: - *copcode = C_OPCODE_PK2H; - return 0; - case TGSI_OPCODE_PK2US: - *copcode = C_OPCODE_PK2US; - return 0; - case TGSI_OPCODE_PK4B: - *copcode = C_OPCODE_PK4B; - return 0; - case TGSI_OPCODE_PK4UB: - *copcode = C_OPCODE_PK4UB; - return 0; - case TGSI_OPCODE_RFL: - *copcode = C_OPCODE_RFL; - return 0; - case TGSI_OPCODE_SEQ: - *copcode = C_OPCODE_SEQ; - return 0; - case TGSI_OPCODE_SFL: - *copcode = C_OPCODE_SFL; - return 0; - case TGSI_OPCODE_SGT: - *copcode = C_OPCODE_SGT; - return 0; - case TGSI_OPCODE_SIN: - *copcode = C_OPCODE_SIN; - return 0; - case TGSI_OPCODE_SLE: - *copcode = C_OPCODE_SLE; - return 0; - case TGSI_OPCODE_SNE: - *copcode = C_OPCODE_SNE; - return 0; - case TGSI_OPCODE_STR: - *copcode = C_OPCODE_STR; - return 0; - case TGSI_OPCODE_TEX: - *copcode = C_OPCODE_TEX; - return 0; - case TGSI_OPCODE_TXD: - *copcode = C_OPCODE_TXD; - return 0; - case TGSI_OPCODE_TXP: - *copcode = C_OPCODE_TXP; - return 0; - case TGSI_OPCODE_UP2H: - *copcode = C_OPCODE_UP2H; - return 0; - case TGSI_OPCODE_UP2US: - *copcode = C_OPCODE_UP2US; - return 0; - case TGSI_OPCODE_UP4B: - *copcode = C_OPCODE_UP4B; - return 0; - case TGSI_OPCODE_UP4UB: - *copcode = C_OPCODE_UP4UB; - return 0; - case TGSI_OPCODE_X2D: - *copcode = C_OPCODE_X2D; - return 0; - case TGSI_OPCODE_ARA: - *copcode = C_OPCODE_ARA; - return 0; - case TGSI_OPCODE_ARR: - *copcode = C_OPCODE_ARR; - return 0; - case TGSI_OPCODE_BRA: - *copcode = C_OPCODE_BRA; - return 0; - case TGSI_OPCODE_CAL: - *copcode = C_OPCODE_CAL; - return 0; - case TGSI_OPCODE_RET: - *copcode = C_OPCODE_RET; - return 0; - case TGSI_OPCODE_SSG: - *copcode = C_OPCODE_SSG; - return 0; - case TGSI_OPCODE_CMP: - *copcode = C_OPCODE_CMP; - return 0; - case TGSI_OPCODE_SCS: - *copcode = C_OPCODE_SCS; - return 0; - case TGSI_OPCODE_TXB: - *copcode = C_OPCODE_TXB; - return 0; - case TGSI_OPCODE_NRM: - *copcode = C_OPCODE_NRM; - return 0; - case TGSI_OPCODE_DIV: - *copcode = C_OPCODE_DIV; - return 0; - case TGSI_OPCODE_DP2: - *copcode = C_OPCODE_DP2; - return 0; - case TGSI_OPCODE_TXL: - *copcode = C_OPCODE_TXL; - return 0; - case TGSI_OPCODE_BRK: - *copcode = C_OPCODE_BRK; - return 0; - case TGSI_OPCODE_IF: - *copcode = C_OPCODE_IF; - return 0; - case TGSI_OPCODE_ELSE: - *copcode = C_OPCODE_ELSE; - return 0; - case TGSI_OPCODE_ENDIF: - *copcode = C_OPCODE_ENDIF; - return 0; - case TGSI_OPCODE_PUSHA: - *copcode = C_OPCODE_PUSHA; - return 0; - case TGSI_OPCODE_POPA: - *copcode = C_OPCODE_POPA; - return 0; - case TGSI_OPCODE_CEIL: - *copcode = C_OPCODE_CEIL; - return 0; - case TGSI_OPCODE_I2F: - *copcode = C_OPCODE_I2F; - return 0; - case TGSI_OPCODE_NOT: - *copcode = C_OPCODE_NOT; - return 0; - case TGSI_OPCODE_TRUNC: - *copcode = C_OPCODE_TRUNC; - return 0; - case TGSI_OPCODE_SHL: - *copcode = C_OPCODE_SHL; - return 0; - case TGSI_OPCODE_AND: - *copcode = C_OPCODE_AND; - return 0; - case TGSI_OPCODE_OR: - *copcode = C_OPCODE_OR; - return 0; - case TGSI_OPCODE_MOD: - *copcode = C_OPCODE_MOD; - return 0; - case TGSI_OPCODE_XOR: - *copcode = C_OPCODE_XOR; - return 0; - case TGSI_OPCODE_SAD: - *copcode = C_OPCODE_SAD; - return 0; - case TGSI_OPCODE_TXF: - *copcode = C_OPCODE_TXF; - return 0; - case TGSI_OPCODE_TXQ: - *copcode = C_OPCODE_TXQ; - return 0; - case TGSI_OPCODE_CONT: - *copcode = C_OPCODE_CONT; - return 0; - case TGSI_OPCODE_EMIT: - *copcode = C_OPCODE_EMIT; - return 0; - case TGSI_OPCODE_ENDPRIM: - *copcode = C_OPCODE_ENDPRIM; - return 0; - case TGSI_OPCODE_BGNLOOP: - *copcode = C_OPCODE_BGNLOOP; - return 0; - case TGSI_OPCODE_BGNSUB: - *copcode = C_OPCODE_BGNSUB; - return 0; - case TGSI_OPCODE_ENDLOOP: - *copcode = C_OPCODE_ENDLOOP; - return 0; - case TGSI_OPCODE_ENDSUB: - *copcode = C_OPCODE_ENDSUB; - return 0; - case TGSI_OPCODE_NOP: - *copcode = C_OPCODE_NOP; - return 0; - case TGSI_OPCODE_NRM4: - *copcode = C_OPCODE_NRM4; - return 0; - case TGSI_OPCODE_CALLNZ: - *copcode = C_OPCODE_CALLNZ; - return 0; - case TGSI_OPCODE_IFC: - *copcode = C_OPCODE_IFC; - return 0; - case TGSI_OPCODE_BREAKC: - *copcode = C_OPCODE_BREAKC; - return 0; - case TGSI_OPCODE_KIL: - *copcode = C_OPCODE_KIL; - return 0; - case TGSI_OPCODE_F2I: - *copcode = C_OPCODE_F2I; - return 0; - case TGSI_OPCODE_IDIV: - *copcode = C_OPCODE_IDIV; - return 0; - case TGSI_OPCODE_IMAX: - *copcode = C_OPCODE_IMAX; - return 0; - case TGSI_OPCODE_IMIN: - *copcode = C_OPCODE_IMIN; - return 0; - case TGSI_OPCODE_INEG: - *copcode = C_OPCODE_INEG; - return 0; - case TGSI_OPCODE_ISGE: - *copcode = C_OPCODE_ISGE; - return 0; - case TGSI_OPCODE_ISHR: - *copcode = C_OPCODE_ISHR; - return 0; - case TGSI_OPCODE_ISLT: - *copcode = C_OPCODE_ISLT; - return 0; - case TGSI_OPCODE_F2U: - *copcode = C_OPCODE_F2U; - return 0; - case TGSI_OPCODE_U2F: - *copcode = C_OPCODE_U2F; - return 0; - case TGSI_OPCODE_UADD: - *copcode = C_OPCODE_UADD; - return 0; - case TGSI_OPCODE_UDIV: - *copcode = C_OPCODE_UDIV; - return 0; - case TGSI_OPCODE_UMAD: - *copcode = C_OPCODE_UMAD; - return 0; - case TGSI_OPCODE_UMAX: - *copcode = C_OPCODE_UMAX; - return 0; - case TGSI_OPCODE_UMIN: - *copcode = C_OPCODE_UMIN; - return 0; - case TGSI_OPCODE_UMOD: - *copcode = C_OPCODE_UMOD; - return 0; - case TGSI_OPCODE_UMUL: - *copcode = C_OPCODE_UMUL; - return 0; - case TGSI_OPCODE_USEQ: - *copcode = C_OPCODE_USEQ; - return 0; - case TGSI_OPCODE_USGE: - *copcode = C_OPCODE_USGE; - return 0; - case TGSI_OPCODE_USHR: - *copcode = C_OPCODE_USHR; - return 0; - case TGSI_OPCODE_USLT: - *copcode = C_OPCODE_USLT; - return 0; - case TGSI_OPCODE_USNE: - *copcode = C_OPCODE_USNE; - return 0; - case TGSI_OPCODE_SWITCH: - *copcode = C_OPCODE_SWITCH; - return 0; - case TGSI_OPCODE_CASE: - *copcode = C_OPCODE_CASE; - return 0; - case TGSI_OPCODE_DEFAULT: - *copcode = C_OPCODE_DEFAULT; - return 0; - case TGSI_OPCODE_ENDSWITCH: - *copcode = C_OPCODE_ENDSWITCH; - return 0; - default: - fprintf(stderr, "%s:%d unsupported opcode %d\n", __func__, __LINE__, opcode); - return -EINVAL; - } -} diff --git a/src/gallium/drivers/r600/r600_context.c b/src/gallium/drivers/r600/r600_context.c index 05575b5767..3c5195f79e 100644 --- a/src/gallium/drivers/r600/r600_context.c +++ b/src/gallium/drivers/r600/r600_context.c @@ -55,7 +55,7 @@ static void r600_flush(struct pipe_context *ctx, unsigned flags, */ if (!dc) radeon_ctx_dump_bof(rctx->ctx, "gallium.bof"); -#if 0 +#if 1 radeon_ctx_submit(rctx->ctx); #endif rctx->ctx = radeon_ctx_decref(rctx->ctx); diff --git a/src/gallium/drivers/r600/r600_context.h b/src/gallium/drivers/r600/r600_context.h index f27ff58ed4..669aaec0b2 100644 --- a/src/gallium/drivers/r600/r600_context.h +++ b/src/gallium/drivers/r600/r600_context.h @@ -40,7 +40,6 @@ struct r600_vertex_elements_state }; struct r600_pipe_shader { - unsigned type; struct r600_shader shader; struct radeon_bo *bo; struct radeon_state *state; @@ -92,8 +91,10 @@ struct pipe_context *r600_create_context(struct pipe_screen *screen, void *priv) void r600_pipe_shader_destroy(struct pipe_context *ctx, struct r600_pipe_shader *rpshader); struct r600_pipe_shader *r600_pipe_shader_create(struct pipe_context *ctx, - unsigned type, const struct tgsi_token *tokens); int r600_pipe_shader_update(struct pipe_context *ctx, struct r600_pipe_shader *rpshader); +#define R600_ERR(fmt, args...) \ + fprintf(stderr, "EE %s/%s:%d - "fmt, __FILE__, __func__, __LINE__, ##args) + #endif diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index f7d6e10663..4a6cf40c26 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -19,40 +19,112 @@ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE * USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: - * Jerome Glisse */ -#include -#include -#include -#include -#include -#include +#include "pipe/p_shader_tokens.h" +#include "tgsi/tgsi_parse.h" +#include "tgsi/tgsi_scan.h" +#include "util/u_format.h" #include "r600_screen.h" #include "r600_context.h" +#include "r600_shader.h" +#include "r600_asm.h" +#include "r600_sq.h" #include "r600d.h" +#include +#include + +static int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader); + +static int r600_shader_update(struct pipe_context *ctx, struct r600_shader *shader) +{ + struct r600_context *rctx = r600_context(ctx); + const struct util_format_description *desc; + enum pipe_format resource_format[160]; + unsigned i, nresources = 0; + struct r600_bc *bc = &shader->bc; + struct r600_bc_cf *cf; + struct r600_bc_vtx *vtx; + + if (shader->processor_type != TGSI_PROCESSOR_VERTEX) + return 0; + for (i = 0; i < rctx->vertex_elements->count; i++) { + resource_format[nresources++] = rctx->vertex_elements->elements[i].src_format; + } + LIST_FOR_EACH_ENTRY(cf, &bc->cf, list) { + switch (cf->inst) { + case V_SQ_CF_WORD1_SQ_CF_INST_VTX: + case V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC: + LIST_FOR_EACH_ENTRY(vtx, &cf->vtx, list) { + desc = util_format_description(resource_format[vtx->buffer_id]); + if (desc == NULL) { + R600_ERR("unknown format %d\n", resource_format[vtx->buffer_id]); + return -EINVAL; + } + vtx->dst_sel_x = desc->swizzle[0]; + vtx->dst_sel_y = desc->swizzle[1]; + vtx->dst_sel_z = desc->swizzle[2]; + vtx->dst_sel_w = desc->swizzle[3]; + } + break; + default: + break; + } + } + return r600_bc_build(&shader->bc); +} + +struct r600_pipe_shader *r600_pipe_shader_create(struct pipe_context *ctx, + const struct tgsi_token *tokens) +{ + struct r600_screen *rscreen = r600_screen(ctx->screen); + struct r600_pipe_shader *rpshader = CALLOC_STRUCT(r600_pipe_shader); + int r; + +fprintf(stderr, "--------------------------------------------------------------\n"); +tgsi_dump(tokens, 0); + if (rpshader == NULL) + return NULL; + rpshader->shader.family = radeon_get_family(rscreen->rw); + r = r600_shader_from_tgsi(tokens, &rpshader->shader); + if (r) { + R600_ERR("translation from TGSI failed !\n"); + goto out_err; + } + r = r600_bc_build(&rpshader->shader.bc); + if (r) { + R600_ERR("building bytecode failed !\n"); + goto out_err; + } +fprintf(stderr, "______________________________________________________________\n"); + return rpshader; +out_err: + free(rpshader); + return NULL; +} static int r600_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shader *rpshader) { struct r600_screen *rscreen = r600_screen(ctx->screen); struct r600_shader *rshader = &rpshader->shader; struct radeon_state *state; - unsigned i, tmp; + unsigned i, j, tmp; rpshader->state = radeon_state_decref(rpshader->state); state = radeon_state(rscreen->rw, R600_VS_SHADER_TYPE, R600_VS_SHADER); if (state == NULL) return -ENOMEM; - for (i = 0; i < rshader->noutput; i += 4) { - tmp = rshader->output[i].sid; - tmp |= rshader->output[i + 1].sid << 8; - tmp |= rshader->output[i + 2].sid << 16; - tmp |= rshader->output[i + 3].sid << 24; - state->states[R600_VS_SHADER__SPI_VS_OUT_ID_0 + i / 4] = tmp; - } - state->states[R600_VS_SHADER__SPI_VS_OUT_CONFIG] = S_0286C4_VS_EXPORT_COUNT(rshader->noutput - 1); - state->states[R600_VS_SHADER__SQ_PGM_RESOURCES_VS] = S_028868_NUM_GPRS(rshader->ngpr); + for (i = 0; i < 10; i++) { + state->states[R600_VS_SHADER__SPI_VS_OUT_ID_0 + i] = 0; + } + for (i = 0, j = 0; i < rshader->noutput; i++) { + if (rshader->output[i].name != TGSI_SEMANTIC_POSITION) { + tmp = rshader->output[i].sid << ((j & 3) * 8); + state->states[R600_VS_SHADER__SPI_VS_OUT_ID_0 + j / 4] |= tmp; + j++; + } + } + state->states[R600_VS_SHADER__SPI_VS_OUT_CONFIG] = S_0286C4_VS_EXPORT_COUNT(rshader->noutput - 2); + state->states[R600_VS_SHADER__SQ_PGM_RESOURCES_VS] = S_028868_NUM_GPRS(rshader->bc.ngpr); rpshader->state = state; rpshader->state->bo[0] = radeon_bo_incref(rscreen->rw, rpshader->bo); rpshader->state->bo[1] = radeon_bo_incref(rscreen->rw, rpshader->bo); @@ -81,7 +153,7 @@ static int r600_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader state->states[R600_PS_SHADER__SPI_PS_IN_CONTROL_0] = S_0286CC_NUM_INTERP(rshader->ninput) | S_0286CC_PERSP_GRADIENT_ENA(1); state->states[R600_PS_SHADER__SPI_PS_IN_CONTROL_1] = 0x00000000; - state->states[R600_PS_SHADER__SQ_PGM_RESOURCES_PS] = S_028868_NUM_GPRS(rshader->ngpr); + state->states[R600_PS_SHADER__SQ_PGM_RESOURCES_PS] = S_028868_NUM_GPRS(rshader->bc.ngpr); state->states[R600_PS_SHADER__SQ_PGM_EXPORTS_PS] = 0x00000002; rpshader->state = state; rpshader->state->bo[0] = radeon_bo_incref(rscreen->rw, rpshader->bo); @@ -100,21 +172,21 @@ static int r600_pipe_shader(struct pipe_context *ctx, struct r600_pipe_shader *r /* copy new shader */ radeon_bo_decref(rscreen->rw, rpshader->bo); rpshader->bo = NULL; - rpshader->bo = radeon_bo(rscreen->rw, 0, rshader->ndw * 4, + rpshader->bo = radeon_bo(rscreen->rw, 0, rshader->bc.ndw * 4, 4096, NULL); if (rpshader->bo == NULL) { return -ENOMEM; } radeon_bo_map(rscreen->rw, rpshader->bo); - memcpy(rpshader->bo->data, rshader->bcode, rshader->ndw * 4); + memcpy(rpshader->bo->data, rshader->bc.bytecode, rshader->bc.ndw * 4); radeon_bo_unmap(rscreen->rw, rpshader->bo); /* build state */ rshader->flat_shade = rctx->flat_shade; - switch (rpshader->type) { - case C_PROGRAM_TYPE_VS: + switch (rshader->processor_type) { + case TGSI_PROCESSOR_VERTEX: r = r600_pipe_shader_vs(ctx, rpshader); break; - case C_PROGRAM_TYPE_FS: + case TGSI_PROCESSOR_FRAGMENT: r = r600_pipe_shader_ps(ctx, rpshader); break; default: @@ -124,104 +196,610 @@ static int r600_pipe_shader(struct pipe_context *ctx, struct r600_pipe_shader *r return r; } -struct r600_pipe_shader *r600_pipe_shader_create(struct pipe_context *ctx, unsigned type, const struct tgsi_token *tokens) +int r600_pipe_shader_update(struct pipe_context *ctx, struct r600_pipe_shader *rpshader) { - struct r600_screen *rscreen = r600_screen(ctx->screen); - struct r600_pipe_shader *rpshader = CALLOC_STRUCT(r600_pipe_shader); - struct r600_shader *rshader = &rpshader->shader; + struct r600_context *rctx = r600_context(ctx); int r; - enum radeon_family family; if (rpshader == NULL) - return NULL; - rpshader->type = type; - family = radeon_get_family(rscreen->rw); - rshader->r6xx_compile = (family >= CHIP_R600 && family < CHIP_RV770); - LIST_INITHEAD(&rshader->nodes); - fprintf(stderr, "<< %s\n", rshader->r6xx_compile ? "R600" : "R700"); - tgsi_dump(tokens, 0); - fprintf(stderr, "--------------------------------------------------------------\n"); - r = c_shader_from_tgsi(&rshader->cshader, type, tokens); - if (r) { - r600_pipe_shader_destroy(ctx, rpshader); - fprintf(stderr, "ERROR(%s %d)>>\n\n", __func__, __LINE__); - return NULL; + return -EINVAL; + /* there should be enough input */ + if (rctx->vertex_elements->count < rpshader->shader.bc.nresource) { + R600_ERR("%d resources provided, expecting %d\n", + rctx->vertex_elements->count, rpshader->shader.bc.nresource); + return -EINVAL; } - r = r600_shader_insert_fetch(&rshader->cshader); - if (r) { - r600_pipe_shader_destroy(ctx, rpshader); - fprintf(stderr, "ERROR(%s %d)>>\n\n", __func__, __LINE__); - return NULL; + r = r600_shader_update(ctx, &rpshader->shader); + if (r) + return r; + return r600_pipe_shader(ctx, rpshader); +} + +struct r600_shader_tgsi_instruction; + +struct r600_shader_ctx { + struct tgsi_shader_info info; + struct tgsi_parse_context parse; + const struct tgsi_token *tokens; + unsigned type; + unsigned file_offset[TGSI_FILE_COUNT]; + unsigned temp_reg; + struct r600_shader_tgsi_instruction *inst_info; + struct r600_bc *bc; + struct r600_shader *shader; +}; + +struct r600_shader_tgsi_instruction { + unsigned tgsi_opcode; + unsigned is_op3; + unsigned r600_opcode; + int (*process)(struct r600_shader_ctx *ctx); +}; + +static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[]; + +static int tgsi_is_supported(struct r600_shader_ctx *ctx) +{ + struct tgsi_full_instruction *i = &ctx->parse.FullToken.FullInstruction; + int j; + + if (i->Instruction.NumDstRegs > 1) { + R600_ERR("too many dst (%d)\n", i->Instruction.NumDstRegs); + return -EINVAL; } - r = c_shader_build_dominator_tree(&rshader->cshader); - if (r) { - r600_pipe_shader_destroy(ctx, rpshader); - fprintf(stderr, "ERROR(%s %d)>>\n\n", __func__, __LINE__); - return NULL; + if (i->Instruction.Saturate) { + R600_ERR("staturate unsupported\n"); + return -EINVAL; } - r = r600_cshader_legalize(&rshader->cshader); - if (r) { - r600_pipe_shader_destroy(ctx, rpshader); - fprintf(stderr, "ERROR(%s %d)>>\n\n", __func__, __LINE__); - return NULL; + if (i->Instruction.Predicate) { + R600_ERR("predicate unsupported\n"); + return -EINVAL; } - c_shader_dump(&rshader->cshader); - r = r700_shader_translate(rshader); - if (r) { - r600_pipe_shader_destroy(ctx, rpshader); - fprintf(stderr, "ERROR(%s %d)>>\n\n", __func__, __LINE__); - return NULL; + if (i->Instruction.Label) { + R600_ERR("label unsupported\n"); + return -EINVAL; } -#if 1 -#if 0 - fprintf(stderr, "--------------------------------------------------------------\n"); - for (int i = 0; i < rshader->ndw; i++) { - fprintf(stderr, "0x%08X\n", rshader->bcode[i]); + if (i->Instruction.Texture) { + R600_ERR("texture unsupported\n"); + return -EINVAL; } -#endif - fprintf(stderr, ">>\n\n"); -#endif - return rpshader; + for (j = 0; j < i->Instruction.NumSrcRegs; j++) { + if (i->Src[j].Register.Indirect || + i->Src[j].Register.Dimension || + i->Src[j].Register.Absolute) { + R600_ERR("unsupported src (indirect|dimension|absolute)\n"); + return -EINVAL; + } + } + for (j = 0; j < i->Instruction.NumDstRegs; j++) { + if (i->Dst[j].Register.Indirect || i->Dst[j].Register.Dimension) { + R600_ERR("unsupported dst (indirect|dimension)\n"); + return -EINVAL; + } + } + return 0; } -void r600_pipe_shader_destroy(struct pipe_context *ctx, struct r600_pipe_shader *rpshader) +static int tgsi_declaration(struct r600_shader_ctx *ctx) { - struct r600_screen *rscreen = r600_screen(ctx->screen); + struct tgsi_full_declaration *d = &ctx->parse.FullToken.FullDeclaration; + struct r600_bc_vtx vtx; + unsigned i; + int r; - if (rpshader == NULL) - return; - radeon_bo_decref(rscreen->rw, rpshader->bo); - rpshader->bo = NULL; - r600_shader_cleanup(&rpshader->shader); - FREE(rpshader); + switch (d->Declaration.File) { + case TGSI_FILE_INPUT: + i = ctx->shader->ninput++; + ctx->shader->input[i].name = d->Semantic.Name; + ctx->shader->input[i].sid = d->Semantic.Index; + ctx->shader->input[i].gpr = ctx->file_offset[TGSI_FILE_INPUT] + i; + if (ctx->type == TGSI_PROCESSOR_VERTEX) { + /* turn input into fetch */ + memset(&vtx, 0, sizeof(struct r600_bc_vtx)); + vtx.inst = 0; + vtx.fetch_type = 0; + vtx.buffer_id = i; + /* register containing the index into the buffer */ + vtx.src_gpr = 0; + vtx.src_sel_x = 0; + vtx.mega_fetch_count = 0x1F; + vtx.dst_gpr = ctx->shader->input[i].gpr; + vtx.dst_sel_x = 0; + vtx.dst_sel_y = 1; + vtx.dst_sel_z = 2; + vtx.dst_sel_w = 3; + r = r600_bc_add_vtx(ctx->bc, &vtx); + if (r) + return r; + } + break; + case TGSI_FILE_OUTPUT: + i = ctx->shader->noutput++; + ctx->shader->output[i].name = d->Semantic.Name; + ctx->shader->output[i].sid = d->Semantic.Index; + ctx->shader->output[i].gpr = ctx->file_offset[TGSI_FILE_OUTPUT] + i; + break; + case TGSI_FILE_CONSTANT: + case TGSI_FILE_TEMPORARY: + break; + default: + R600_ERR("unsupported file %d declaration\n", d->Declaration.File); + return -EINVAL; + } + return 0; } -int r600_pipe_shader_update(struct pipe_context *ctx, struct r600_pipe_shader *rpshader) +int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader) { - struct r600_context *rctx = r600_context(ctx); - struct r600_shader *rshader; - enum pipe_format resource_format[160]; - unsigned i, nresources = 0; - int r; + struct tgsi_full_immediate *immediate; + struct r600_shader_ctx ctx; + struct r600_bc_output output; + unsigned opcode; + int i, r = 0, pos0; + u32 value[4]; - if (rpshader == NULL) - return -EINVAL; - rshader = &rpshader->shader; - switch (rpshader->type) { - case C_PROGRAM_TYPE_VS: - for (i = 0; i < rctx->vertex_elements->count; i++) { - resource_format[nresources++] = rctx->vertex_elements->elements[i].src_format; + ctx.bc = &shader->bc; + ctx.shader = shader; + r = r600_bc_init(ctx.bc, shader->family); + if (r) + return r; + ctx.tokens = tokens; + tgsi_scan_shader(tokens, &ctx.info); + tgsi_parse_init(&ctx.parse, tokens); + ctx.type = ctx.parse.FullHeader.Processor.Processor; + shader->processor_type = ctx.type; + + /* register allocations */ + /* Values [0,127] correspond to GPR[0..127]. + * Values [256,511] correspond to cfile constants c[0..255]. + * Other special values are shown in the list below. + * 248 SQ_ALU_SRC_0: special constant 0.0. + * 249 SQ_ALU_SRC_1: special constant 1.0 float. + * 250 SQ_ALU_SRC_1_INT: special constant 1 integer. + * 251 SQ_ALU_SRC_M_1_INT: special constant -1 integer. + * 252 SQ_ALU_SRC_0_5: special constant 0.5 float. + * 253 SQ_ALU_SRC_LITERAL: literal constant. + * 254 SQ_ALU_SRC_PV: previous vector result. + * 255 SQ_ALU_SRC_PS: previous scalar result. + */ + for (i = 0; i < TGSI_FILE_COUNT; i++) { + ctx.file_offset[i] = 0; + } + if (ctx.type == TGSI_PROCESSOR_VERTEX) { + ctx.file_offset[TGSI_FILE_INPUT] = 1; + } + ctx.file_offset[TGSI_FILE_OUTPUT] = ctx.file_offset[TGSI_FILE_INPUT] + + ctx.info.file_count[TGSI_FILE_INPUT]; + ctx.file_offset[TGSI_FILE_TEMPORARY] = ctx.file_offset[TGSI_FILE_OUTPUT] + + ctx.info.file_count[TGSI_FILE_OUTPUT]; + ctx.file_offset[TGSI_FILE_CONSTANT] = 256; + ctx.file_offset[TGSI_FILE_IMMEDIATE] = 253; + ctx.temp_reg = ctx.file_offset[TGSI_FILE_TEMPORARY] + + ctx.info.file_count[TGSI_FILE_TEMPORARY]; + + while (!tgsi_parse_end_of_tokens(&ctx.parse)) { + tgsi_parse_token(&ctx.parse); + switch (ctx.parse.FullToken.Token.Type) { + case TGSI_TOKEN_TYPE_IMMEDIATE: +// R600_ERR("TGSI_TOKEN_TYPE_IMMEDIATE unsupported\n"); + immediate = &ctx.parse.FullToken.FullImmediate; + value[0] = immediate->u[0].Uint; + value[1] = immediate->u[1].Uint; + value[2] = immediate->u[2].Uint; + value[3] = immediate->u[3].Uint; + break; + case TGSI_TOKEN_TYPE_DECLARATION: + r = tgsi_declaration(&ctx); + if (r) + goto out_err; + break; + case TGSI_TOKEN_TYPE_INSTRUCTION: + r = tgsi_is_supported(&ctx); + if (r) + goto out_err; + opcode = ctx.parse.FullToken.FullInstruction.Instruction.Opcode; + ctx.inst_info = &r600_shader_tgsi_instruction[opcode]; + r = ctx.inst_info->process(&ctx); + if (r) + goto out_err; + r = r600_bc_add_literal(ctx.bc, value); + if (r) + goto out_err; + break; + default: + R600_ERR("unsupported token type %d\n", ctx.parse.FullToken.Token.Type); + r = -EINVAL; + goto out_err; + } + } + /* export output */ + for (i = 0, pos0 = 0; i < shader->noutput; i++) { + memset(&output, 0, sizeof(struct r600_bc_output)); + output.gpr = shader->output[i].gpr; + output.elem_size = 3; + output.swizzle_x = 0; + output.swizzle_y = 1; + output.swizzle_z = 2; + output.swizzle_w = 3; + output.barrier = 1; + output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM; + output.array_base = i - pos0; + output.inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE; + switch (ctx.type == TGSI_PROCESSOR_VERTEX) { + case TGSI_PROCESSOR_VERTEX: + if (shader->output[i].name == TGSI_SEMANTIC_POSITION) { + output.array_base = 60; + output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; + /* position doesn't count in array_base */ + pos0 = 1; + } + break; + case TGSI_PROCESSOR_FRAGMENT: + if (shader->output[i].name == TGSI_SEMANTIC_COLOR) { + output.array_base = 0; + output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; + } else { + R600_ERR("unsupported fragment output name %d\n", shader->output[i].name); + r = -EINVAL; + goto out_err; + } + break; + default: + R600_ERR("unsupported processor type %d\n", ctx.type); + r = -EINVAL; + goto out_err; } + if (i == (shader->noutput - 1)) { + output.end_of_program = 1; + } + r = r600_bc_add_output(ctx.bc, &output); + if (r) + goto out_err; + } + tgsi_parse_free(&ctx.parse); + return 0; +out_err: + tgsi_parse_free(&ctx.parse); + return r; +} + +static int tgsi_unsupported(struct r600_shader_ctx *ctx) +{ + R600_ERR("%d tgsi opcode unsupported\n", ctx->inst_info->tgsi_opcode); + return -EINVAL; +} + +static int tgsi_end(struct r600_shader_ctx *ctx) +{ + return 0; +} + +static int tgsi_src(struct r600_shader_ctx *ctx, + const struct tgsi_full_src_register *tgsi_src, + unsigned swizzle, + struct r600_bc_alu_src *r600_src) +{ + r600_src->sel = tgsi_src->Register.Index; + if (tgsi_src->Register.File == TGSI_FILE_IMMEDIATE) { + r600_src->sel = 0; + } + r600_src->sel += ctx->file_offset[tgsi_src->Register.File]; + switch (swizzle) { + case 0: + r600_src->chan = tgsi_src->Register.SwizzleX; break; - default: + case 1: + r600_src->chan = tgsi_src->Register.SwizzleY; break; - } - /* there should be enough input */ - if (nresources < rshader->nresource) + case 2: + r600_src->chan = tgsi_src->Register.SwizzleZ; + break; + case 3: + r600_src->chan = tgsi_src->Register.SwizzleW; + break; + default: return -EINVAL; - /* FIXME compare resources */ - r = r600_shader_update(rshader, resource_format); - if (r) - return r; - return r600_pipe_shader(ctx, rpshader); + } + return 0; +} + +static int tgsi_dst(struct r600_shader_ctx *ctx, + const struct tgsi_full_dst_register *tgsi_dst, + unsigned swizzle, + struct r600_bc_alu_dst *r600_dst) +{ + r600_dst->sel = tgsi_dst->Register.Index; + r600_dst->sel += ctx->file_offset[tgsi_dst->Register.File]; + r600_dst->chan = swizzle; + r600_dst->write = 1; + return 0; +} + +static int tgsi_op2(struct r600_shader_ctx *ctx) +{ + struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; + struct r600_bc_alu alu; + int i, j, r; + + for (i = 0; i < 4; i++) { + memset(&alu, 0, sizeof(struct r600_bc_alu)); + if (!(inst->Dst[0].Register.WriteMask & (1 << i))) { + alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP; + } else { + alu.inst = ctx->inst_info->r600_opcode; + for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { + r = tgsi_src(ctx, &inst->Src[j], i, &alu.src[j]); + if (r) + return r; + } + r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); + if (r) + return r; + } + /* handle some special cases */ + switch (ctx->inst_info->tgsi_opcode) { + case TGSI_OPCODE_SUB: + alu.src[1].neg = 1; + break; + case TGSI_OPCODE_DP2: + if (i > 1) { + alu.src[0].sel = alu.src[1].sel = 248; + alu.src[0].chan = alu.src[1].chan = 0; + } + break; + case TGSI_OPCODE_DP3: + if (i > 2) { + alu.src[0].sel = alu.src[1].sel = 248; + alu.src[0].chan = alu.src[1].chan = 0; + } + break; + default: + break; + } + if (i == 3) { + alu.last = 1; + } + alu.nliteral = 0; + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + } + return 0; +} + +static int tgsi_slt(struct r600_shader_ctx *ctx) +{ + struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; + struct r600_bc_alu alu; + int i, r; + + for (i = 0; i < 4; i++) { + memset(&alu, 0, sizeof(struct r600_bc_alu)); + if (!(inst->Dst[0].Register.WriteMask & (1 << i))) { + alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP; + } else { + alu.inst = ctx->inst_info->r600_opcode; + r = tgsi_src(ctx, &inst->Src[0], i, &alu.src[1]); + if (r) + return r; + r = tgsi_src(ctx, &inst->Src[1], i, &alu.src[0]); + if (r) + return r; + r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); + if (r) + return r; + } + if (i == 3) { + alu.last = 1; + } + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + } + return 0; } + +static int tgsi_op3(struct r600_shader_ctx *ctx) +{ + struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; + struct r600_bc_alu alu; + int i, j, r; + + /* do it in 2 step as op3 doesn't support writemask */ + for (i = 0; i < 4; i++) { + memset(&alu, 0, sizeof(struct r600_bc_alu)); + alu.inst = ctx->inst_info->r600_opcode; + for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { + r = tgsi_src(ctx, &inst->Src[j], i, &alu.src[j]); + if (r) + return r; + } + alu.dst.sel = ctx->temp_reg; + alu.dst.chan = i; + alu.is_op3 = 1; + if (i == 3) { + alu.last = 1; + } + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + } + for (i = 0; i < 4; i++) { + memset(&alu, 0, sizeof(struct r600_bc_alu)); + if (!(inst->Dst[0].Register.WriteMask & (1 << i))) { + alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP; + } else { + alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV; + r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); + if (r) + return r; + alu.src[0].sel = ctx->temp_reg; + alu.src[0].chan = i; + } + if (i == 3) { + alu.last = 1; + } + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + } + return 0; +} + +static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = { + {TGSI_OPCODE_ARL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_MOV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2}, + {TGSI_OPCODE_LIT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_RCP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_RSQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_EXP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_LOG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_MUL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2}, + {TGSI_OPCODE_ADD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2}, + {TGSI_OPCODE_DP3, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_op2}, + {TGSI_OPCODE_DP4, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_op2}, + {TGSI_OPCODE_DST, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_MIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_MAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2}, + {TGSI_OPCODE_SLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_slt}, + {TGSI_OPCODE_SGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_MAD, 1, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3}, + {TGSI_OPCODE_SUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2}, + {TGSI_OPCODE_LRP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_CND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + /* gap */ + {20, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_DP2A, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + /* gap */ + {22, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {23, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_FRC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_CLAMP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_FLR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_ROUND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_EX2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_LG2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_POW, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_XPD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + /* gap */ + {32, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_ABS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_RCC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_DPH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_COS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_DDX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_DDY, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_KILP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, /* predicated kill */ + {TGSI_OPCODE_PK2H, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_PK2US, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_PK4B, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_PK4UB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_RFL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_SEQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_SFL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_SGT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_SIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_SLE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_SNE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_STR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_TEX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_TXD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_TXP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_UP2H, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_UP2US, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_UP4B, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_UP4UB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_X2D, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_ARA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_ARR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_BRA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_CAL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_RET, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_SSG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, /* SGN */ + {TGSI_OPCODE_CMP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_SCS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_TXB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_NRM, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_DIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_DP2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_op2}, + {TGSI_OPCODE_TXL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_BRK, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_IF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + /* gap */ + {75, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {76, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_ELSE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_ENDIF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + /* gap */ + {79, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {80, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_PUSHA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_POPA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_CEIL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_I2F, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_NOT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_TRUNC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_SHL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + /* gap */ + {88, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_AND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_OR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_MOD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_XOR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_SAD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_TXF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_TXQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_CONT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_EMIT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_ENDPRIM, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_BGNLOOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_BGNSUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_ENDLOOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_ENDSUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + /* gap */ + {103, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {104, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {105, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {106, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_NOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + /* gap */ + {108, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {109, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {110, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {111, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_NRM4, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_CALLNZ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_IFC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_BREAKC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_KIL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, /* conditional kill */ + {TGSI_OPCODE_END, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end}, /* aka HALT */ + /* gap */ + {118, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_F2I, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_IDIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_IMAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_IMIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_INEG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_ISGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_ISHR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_ISLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_F2U, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_U2F, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_UADD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_UDIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_UMAD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_UMAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_UMIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_UMOD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_UMUL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_USEQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_USGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_USHR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_USLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_USNE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_SWITCH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_CASE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_DEFAULT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_ENDSWITCH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_LAST, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, +}; diff --git a/src/gallium/drivers/r600/r600_shader.h b/src/gallium/drivers/r600/r600_shader.h index 40064ba8a9..23b6a83b9a 100644 --- a/src/gallium/drivers/r600/r600_shader.h +++ b/src/gallium/drivers/r600/r600_shader.h @@ -23,243 +23,23 @@ #ifndef R600_SHADER_H #define R600_SHADER_H -#include "r600_compiler.h" -#include "radeon.h" - -struct r600_shader_operand { - struct c_vector *vector; - unsigned sel; - unsigned chan; - unsigned neg; - unsigned abs; -}; - -struct r600_shader_vfetch { - struct list_head head; - unsigned cf_addr; - struct r600_shader_operand src[2]; - struct r600_shader_operand dst[4]; -}; - -struct r600_shader_inst { - unsigned is_op3; - unsigned opcode; - unsigned inst; - struct r600_shader_operand src[3]; - struct r600_shader_operand dst; - unsigned last; -}; - -struct r600_shader_alu { - struct list_head head; - unsigned nalu; - unsigned nliteral; - unsigned nconstant; - struct r600_shader_inst alu[5]; - u32 literal[4]; -}; - -struct r600_shader_node { - struct list_head head; - unsigned cf_id; /**< cf index (in dw) in byte code */ - unsigned cf_addr; /**< instructions index (in dw) in byte code */ - unsigned nslot; /**< number of slot (2 dw) needed by this node */ - unsigned nfetch; - struct c_node *node; /**< compiler node from which this node originate */ - struct list_head vfetch; /**< list of vfetch instructions */ - struct list_head alu; /**< list of alu instructions */ -}; +#include "r600_asm.h" struct r600_shader_io { - unsigned name; - unsigned gpr; - int sid; + unsigned name; + unsigned gpr; + int sid; }; struct r600_shader { - unsigned stack_size; /**< stack size needed by this shader */ - unsigned ngpr; /**< number of GPR needed by this shader */ - unsigned nconstant; /**< number of constants used by this shader */ - unsigned nresource; /**< number of resources used by this shader */ - unsigned noutput; - unsigned ninput; - unsigned nvector; - unsigned ncf; /**< total number of cf clauses */ - unsigned nslot; /**< total number of slots (2 dw) */ - unsigned flat_shade; /**< are we flat shading */ - struct list_head nodes; /**< list of node */ - struct r600_shader_io input[32]; - struct r600_shader_io output[32]; - /* TODO replace GPR by some better register allocator */ - struct c_vector **gpr; - unsigned ndw; /**< bytes code size in dw */ - u32 *bcode; /**< bytes code */ - enum pipe_format resource_format[160]; /**< format of resource */ - struct c_shader cshader; - boolean r6xx_compile; + unsigned processor_type; + struct r600_bc bc; + boolean flat_shade; + unsigned ninput; + unsigned noutput; + struct r600_shader_io input[32]; + struct r600_shader_io output[32]; + enum radeon_family family; }; -void r600_shader_cleanup(struct r600_shader *rshader); -int r600_shader_register(struct r600_shader *rshader); -int r600_shader_node(struct r600_shader *shader); -void r600_shader_node_place(struct r600_shader *rshader); -int r600_shader_find_gpr(struct r600_shader *rshader, struct c_vector *v, unsigned swizzle, - struct r600_shader_operand *operand); -int r600_shader_vfetch_bytecode(struct r600_shader *rshader, - struct r600_shader_node *rnode, - struct r600_shader_vfetch *vfetch, - unsigned *cid); -int r600_shader_update(struct r600_shader *rshader, - enum pipe_format *resource_format); -int r600_shader_legalize(struct r600_shader *rshader); -int r600_cshader_legalize(struct c_shader *shader); - -int r700_shader_translate(struct r600_shader *rshader); - -int c_shader_from_tgsi(struct c_shader *shader, unsigned type, - const struct tgsi_token *tokens); -int r600_shader_register(struct r600_shader *rshader); -int r600_shader_translate_rec(struct r600_shader *rshader, struct c_node *node); -int r700_shader_translate(struct r600_shader *rshader); -int r600_shader_insert_fetch(struct c_shader *shader); - -int r6xx_shader_alu_translate(struct r600_shader *rshader, - struct r600_shader_node *rnode, - unsigned *cid); - -enum r600_instruction { - INST_ADD = 0, - INST_MUL = 1, - INST_MUL_IEEE = 2, - INST_MAX = 3, - INST_MIN = 4, - INST_MAX_DX10 = 5, - INST_MIN_DX10 = 6, - INST_SETE = 7, - INST_SETGT = 8, - INST_SETGE = 9, - INST_SETNE = 10, - INST_SETE_DX10 = 11, - INST_SETGT_DX10 = 12, - INST_SETGE_DX10 = 13, - INST_SETNE_DX10 = 14, - INST_FRACT = 15, - INST_TRUNC = 16, - INST_CEIL = 17, - INST_RNDNE = 18, - INST_FLOOR = 19, - INST_MOVA = 20, - INST_MOVA_FLOOR = 21, - INST_MOVA_INT = 22, - INST_MOV = 23, - INST_NOP = 24, - INST_PRED_SETGT_UINT = 25, - INST_PRED_SETGE_UINT = 26, - INST_PRED_SETE = 27, - INST_PRED_SETGT = 28, - INST_PRED_SETGE = 29, - INST_PRED_SETNE = 30, - INST_PRED_SET_INV = 31, - INST_PRED_SET_POP = 32, - INST_PRED_SET_CLR = 33, - INST_PRED_SET_RESTORE = 34, - INST_PRED_SETE_PUSH = 35, - INST_PRED_SETGT_PUSH = 36, - INST_PRED_SETGE_PUSH = 37, - INST_PRED_SETNE_PUSH = 38, - INST_KILLE = 39, - INST_KILLGT = 40, - INST_KILLGE = 41, - INST_KILLNE = 42, - INST_AND_INT = 43, - INST_OR_INT = 44, - INST_XOR_INT = 45, - INST_NOT_INT = 46, - INST_ADD_INT = 47, - INST_SUB_INT = 48, - INST_MAX_INT = 49, - INST_MIN_INT = 50, - INST_MAX_UINT = 51, - INST_MIN_UINT = 52, - INST_SETE_INT = 53, - INST_SETGT_INT = 54, - INST_SETGE_INT = 55, - INST_SETNE_INT = 56, - INST_SETGT_UINT = 57, - INST_SETGE_UINT = 58, - INST_KILLGT_UINT = 59, - INST_KILLGE_UINT = 60, - INST_PRED_SETE_INT = 61, - INST_PRED_SETGT_INT = 62, - INST_PRED_SETGE_INT = 63, - INST_PRED_SETNE_INT = 64, - INST_KILLE_INT = 65, - INST_KILLGT_INT = 66, - INST_KILLGE_INT = 67, - INST_KILLNE_INT = 68, - INST_PRED_SETE_PUSH_INT = 69, - INST_PRED_SETGT_PUSH_INT = 70, - INST_PRED_SETGE_PUSH_INT = 71, - INST_PRED_SETNE_PUSH_INT = 72, - INST_PRED_SETLT_PUSH_INT = 73, - INST_PRED_SETLE_PUSH_INT = 74, - INST_DOT4 = 75, - INST_DOT4_IEEE = 76, - INST_CUBE = 77, - INST_MAX4 = 78, - INST_MOVA_GPR_INT = 79, - INST_EXP_IEEE = 80, - INST_LOG_CLAMPED = 81, - INST_LOG_IEEE = 82, - INST_RECIP_CLAMPED = 83, - INST_RECIP_FF = 84, - INST_RECIP_IEEE = 85, - INST_RECIPSQRT_CLAMPED = 86, - INST_RECIPSQRT_FF = 87, - INST_RECIPSQRT_IEEE = 88, - INST_SQRT_IEEE = 89, - INST_FLT_TO_INT = 90, - INST_INT_TO_FLT = 91, - INST_UINT_TO_FLT = 92, - INST_SIN = 93, - INST_COS = 94, - INST_ASHR_INT = 95, - INST_LSHR_INT = 96, - INST_LSHL_INT = 97, - INST_MULLO_INT = 98, - INST_MULHI_INT = 99, - INST_MULLO_UINT = 100, - INST_MULHI_UINT = 101, - INST_RECIP_INT = 102, - INST_RECIP_UINT = 103, - INST_FLT_TO_UINT = 104, - INST_MUL_LIT = 105, - INST_MUL_LIT_M2 = 106, - INST_MUL_LIT_M4 = 107, - INST_MUL_LIT_D2 = 108, - INST_MULADD = 109, - INST_MULADD_M2 = 110, - INST_MULADD_M4 = 111, - INST_MULADD_D2 = 112, - INST_MULADD_IEEE = 113, - INST_MULADD_IEEE_M2 = 114, - INST_MULADD_IEEE_M4 = 115, - INST_MULADD_IEEE_D2 = 116, - INST_CNDE = 117, - INST_CNDGT = 118, - INST_CNDGE = 119, - INST_CNDE_INT = 120, - INST_CNDGT_INT = 121, - INST_CNDGE_INT = 122, - INST_COUNT -}; - -struct r600_instruction_info { - enum r600_instruction instruction; - unsigned opcode; - unsigned is_trans; - unsigned is_op3; -}; - - #endif diff --git a/src/gallium/drivers/r600/r600_sq.h b/src/gallium/drivers/r600/r600_sq.h index 447ba98f00..4770ab0bf7 100644 --- a/src/gallium/drivers/r600/r600_sq.h +++ b/src/gallium/drivers/r600/r600_sq.h @@ -87,9 +87,9 @@ #define G_SQ_CF_WORD1_BARRIER(x) (((x) >> 31) & 0x1) #define C_SQ_CF_WORD1_BARRIER 0x7FFFFFFF #define P_SQ_CF_ALU_WORD0 -#define S_SQ_CF_ALU_WORD0_ALU_ADDR(x) (((x) & 0x3FFFFF) << 0) -#define G_SQ_CF_ALU_WORD0_ALU_ADDR(x) (((x) >> 0) & 0x3FFFFF) -#define C_SQ_CF_ALU_WORD0_ALU_ADDR 0xFFC00000 +#define S_SQ_CF_ALU_WORD0_ADDR(x) (((x) & 0x3FFFFF) << 0) +#define G_SQ_CF_ALU_WORD0_ADDR(x) (((x) >> 0) & 0x3FFFFF) +#define C_SQ_CF_ALU_WORD0_ADDR 0xFFC00000 #define S_SQ_CF_ALU_WORD0_KCACHE_BANK0(x) (((x) & 0xF) << 22) #define G_SQ_CF_ALU_WORD0_KCACHE_BANK0(x) (((x) >> 22) & 0xF) #define C_SQ_CF_ALU_WORD0_KCACHE_BANK0 0xFC3FFFFF @@ -109,15 +109,15 @@ #define S_SQ_CF_ALU_WORD1_KCACHE_ADDR1(x) (((x) & 0xFF) << 10) #define G_SQ_CF_ALU_WORD1_KCACHE_ADDR1(x) (((x) >> 10) & 0xFF) #define C_SQ_CF_ALU_WORD1_KCACHE_ADDR1 0xFFFC03FF -#define S_SQ_CF_ALU_WORD1_ALU_COUNT(x) (((x) & 0x7F) << 18) -#define G_SQ_CF_ALU_WORD1_ALU_COUNT(x) (((x) >> 18) & 0x7F) -#define C_SQ_CF_ALU_WORD1_ALU_COUNT 0xFE03FFFF +#define S_SQ_CF_ALU_WORD1_COUNT(x) (((x) & 0x7F) << 18) +#define G_SQ_CF_ALU_WORD1_COUNT(x) (((x) >> 18) & 0x7F) +#define C_SQ_CF_ALU_WORD1_COUNT 0xFE03FFFF #define S_SQ_CF_ALU_WORD1_USES_WATERFALL(x) (((x) & 0x1) << 25) #define G_SQ_CF_ALU_WORD1_USES_WATERFALL(x) (((x) >> 25) & 0x1) #define C_SQ_CF_ALU_WORD1_USES_WATERFALL 0xFDFFFFFF -#define S_SQ_CF_ALU_WORD1_CF_ALU_INST(x) (((x) & 0xF) << 26) -#define G_SQ_CF_ALU_WORD1_CF_ALU_INST(x) (((x) >> 26) & 0xF) -#define C_SQ_CF_ALU_WORD1_CF_ALU_INST 0xC3FFFFFF +#define S_SQ_CF_ALU_WORD1_CF_INST(x) (((x) & 0xF) << 26) +#define G_SQ_CF_ALU_WORD1_CF_INST(x) (((x) >> 26) & 0xF) +#define C_SQ_CF_ALU_WORD1_CF_INST 0xC3FFFFFF #define V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU 0x00000008 #define V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE 0x00000009 #define V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER 0x0000000A diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index 4150f88785..84a13e4ef7 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -151,7 +151,7 @@ static void r600_set_framebuffer_state(struct pipe_context *ctx, static void *r600_create_fs_state(struct pipe_context *ctx, const struct pipe_shader_state *shader) { - return r600_pipe_shader_create(ctx, C_PROGRAM_TYPE_FS, shader->tokens); + return r600_pipe_shader_create(ctx, shader->tokens); } static void r600_bind_fs_state(struct pipe_context *ctx, void *state) @@ -164,7 +164,7 @@ static void r600_bind_fs_state(struct pipe_context *ctx, void *state) static void *r600_create_vs_state(struct pipe_context *ctx, const struct pipe_shader_state *shader) { - return r600_pipe_shader_create(ctx, C_PROGRAM_TYPE_VS, shader->tokens); + return r600_pipe_shader_create(ctx, shader->tokens); } static void r600_bind_vs_state(struct pipe_context *ctx, void *state) diff --git a/src/gallium/drivers/r600/r700_asm.c b/src/gallium/drivers/r600/r700_asm.c new file mode 100644 index 0000000000..3532ba5b0c --- /dev/null +++ b/src/gallium/drivers/r600/r700_asm.c @@ -0,0 +1,70 @@ +/* + * Copyright 2010 Jerome Glisse + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#include "r600_asm.h" +#include "r600_context.h" +#include "util/u_memory.h" +#include "r700_sq.h" +#include +#include + +int r700_bc_alu_build(struct r600_bc *bc, struct r600_bc_alu *alu, unsigned id) +{ + unsigned i; + + /* don't replace gpr by pv or ps for destination register */ + if (alu->is_op3) { + bc->bytecode[id++] = S_SQ_ALU_WORD0_SRC0_SEL(alu->src[0].sel) | + S_SQ_ALU_WORD0_SRC0_CHAN(alu->src[0].chan) | + S_SQ_ALU_WORD0_SRC1_SEL(alu->src[1].sel) | + S_SQ_ALU_WORD0_SRC1_CHAN(alu->src[1].chan) | + S_SQ_ALU_WORD0_LAST(alu->last); + bc->bytecode[id++] = S_SQ_ALU_WORD1_DST_GPR(alu->dst.sel) | + S_SQ_ALU_WORD1_DST_CHAN(alu->dst.chan) | + S_SQ_ALU_WORD1_OP3_SRC2_SEL(alu->src[2].sel) | + S_SQ_ALU_WORD1_OP3_SRC2_CHAN(alu->src[2].chan) | + S_SQ_ALU_WORD1_OP3_SRC2_NEG(alu->src[2].neg) | + S_SQ_ALU_WORD1_OP3_ALU_INST(alu->inst) | + S_SQ_ALU_WORD1_BANK_SWIZZLE(0); + } else { + bc->bytecode[id++] = S_SQ_ALU_WORD0_SRC0_SEL(alu->src[0].sel) | + S_SQ_ALU_WORD0_SRC0_CHAN(alu->src[0].chan) | + S_SQ_ALU_WORD0_SRC0_NEG(alu->src[0].neg) | + S_SQ_ALU_WORD0_SRC1_SEL(alu->src[1].sel) | + S_SQ_ALU_WORD0_SRC1_CHAN(alu->src[1].chan) | + S_SQ_ALU_WORD0_SRC1_NEG(alu->src[1].neg) | + S_SQ_ALU_WORD0_LAST(alu->last); + bc->bytecode[id++] = S_SQ_ALU_WORD1_DST_GPR(alu->dst.sel) | + S_SQ_ALU_WORD1_DST_CHAN(alu->dst.chan) | + S_SQ_ALU_WORD1_OP2_SRC0_ABS(alu->src[0].abs) | + S_SQ_ALU_WORD1_OP2_SRC1_ABS(alu->src[1].abs) | + S_SQ_ALU_WORD1_OP2_WRITE_MASK(alu->dst.write) | + S_SQ_ALU_WORD1_OP2_ALU_INST(alu->inst) | + S_SQ_ALU_WORD1_BANK_SWIZZLE(0); + } + if (alu->last) { + for (i = 0; i < alu->nliteral; i++) { + bc->bytecode[id++] = alu->value[i]; + } + } + return 0; +} -- cgit v1.2.3 From 5cc2974dff346f3fa53881dbcc158e4563915487 Mon Sep 17 00:00:00 2001 From: Jerome Glisse Date: Fri, 23 Jul 2010 17:49:26 -0400 Subject: r600g: add RSQ token support Could serve as an example on how to add more token support. Signed-off-by: Jerome Glisse --- src/gallium/drivers/r600/r600_shader.c | 30 ++++++++++++++++++++++++++++-- 1 file changed, 28 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index 4a6cf40c26..e983cc90b4 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -559,7 +559,6 @@ static int tgsi_op2(struct r600_shader_ctx *ctx) if (i == 3) { alu.last = 1; } - alu.nliteral = 0; r = r600_bc_add_alu(ctx->bc, &alu); if (r) return r; @@ -599,6 +598,33 @@ static int tgsi_slt(struct r600_shader_ctx *ctx) return 0; } +static int tgsi_trans(struct r600_shader_ctx *ctx) +{ + struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; + struct r600_bc_alu alu; + int i, j, r; + + for (i = 0; i < 4; i++) { + memset(&alu, 0, sizeof(struct r600_bc_alu)); + if (inst->Dst[0].Register.WriteMask & (1 << i)) { + alu.inst = ctx->inst_info->r600_opcode; + for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { + r = tgsi_src(ctx, &inst->Src[j], i, &alu.src[j]); + if (r) + return r; + } + r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); + if (r) + return r; + alu.last = 1; + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + } + } + return 0; +} + static int tgsi_op3(struct r600_shader_ctx *ctx) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; @@ -651,7 +677,7 @@ static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = { {TGSI_OPCODE_MOV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2}, {TGSI_OPCODE_LIT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_RCP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_RSQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_RSQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE, tgsi_trans}, {TGSI_OPCODE_EXP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_LOG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_MUL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2}, -- cgit v1.2.3 From cf864fd58b2a4780482a108cd3ff86779e8fa965 Mon Sep 17 00:00:00 2001 From: Jerome Glisse Date: Fri, 23 Jul 2010 18:19:13 -0400 Subject: r600g: fix dp2, dp3, dp4 tokens We need to make sure dp are all mirror accross the alu unit. Signed-off-by: Jerome Glisse --- src/gallium/drivers/r600/r600_shader.c | 87 +++++++++++++++++++++++++--------- 1 file changed, 64 insertions(+), 23 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index e983cc90b4..d788ab88be 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -541,18 +541,6 @@ static int tgsi_op2(struct r600_shader_ctx *ctx) case TGSI_OPCODE_SUB: alu.src[1].neg = 1; break; - case TGSI_OPCODE_DP2: - if (i > 1) { - alu.src[0].sel = alu.src[1].sel = 248; - alu.src[0].chan = alu.src[1].chan = 0; - } - break; - case TGSI_OPCODE_DP3: - if (i > 2) { - alu.src[0].sel = alu.src[1].sel = 248; - alu.src[0].chan = alu.src[1].chan = 0; - } - break; default: break; } @@ -625,6 +613,33 @@ static int tgsi_trans(struct r600_shader_ctx *ctx) return 0; } +static int tgsi_helper_copy(struct r600_shader_ctx *ctx, struct tgsi_full_instruction *inst) +{ + struct r600_bc_alu alu; + int i, r; + + for (i = 0; i < 4; i++) { + memset(&alu, 0, sizeof(struct r600_bc_alu)); + if (!(inst->Dst[0].Register.WriteMask & (1 << i))) { + alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP; + } else { + alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV; + r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); + if (r) + return r; + alu.src[0].sel = ctx->temp_reg; + alu.src[0].chan = i; + } + if (i == 3) { + alu.last = 1; + } + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + } + return 0; +} + static int tgsi_op3(struct r600_shader_ctx *ctx) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; @@ -642,6 +657,7 @@ static int tgsi_op3(struct r600_shader_ctx *ctx) } alu.dst.sel = ctx->temp_reg; alu.dst.chan = i; + alu.dst.write = 1; alu.is_op3 = 1; if (i == 3) { alu.last = 1; @@ -650,17 +666,42 @@ static int tgsi_op3(struct r600_shader_ctx *ctx) if (r) return r; } + return tgsi_helper_copy(ctx, inst); +} + +static int tgsi_dp(struct r600_shader_ctx *ctx) +{ + struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; + struct r600_bc_alu alu; + int i, j, r; + for (i = 0; i < 4; i++) { memset(&alu, 0, sizeof(struct r600_bc_alu)); - if (!(inst->Dst[0].Register.WriteMask & (1 << i))) { - alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP; - } else { - alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV; - r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); + alu.inst = ctx->inst_info->r600_opcode; + for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { + r = tgsi_src(ctx, &inst->Src[j], i, &alu.src[j]); if (r) return r; - alu.src[0].sel = ctx->temp_reg; - alu.src[0].chan = i; + } + alu.dst.sel = ctx->temp_reg; + alu.dst.chan = i; + alu.dst.write = 1; + /* handle some special cases */ + switch (ctx->inst_info->tgsi_opcode) { + case TGSI_OPCODE_DP2: + if (i > 1) { + alu.src[0].sel = alu.src[1].sel = 248; + alu.src[0].chan = alu.src[1].chan = 0; + } + break; + case TGSI_OPCODE_DP3: + if (i > 2) { + alu.src[0].sel = alu.src[1].sel = 248; + alu.src[0].chan = alu.src[1].chan = 0; + } + break; + default: + break; } if (i == 3) { alu.last = 1; @@ -669,7 +710,7 @@ static int tgsi_op3(struct r600_shader_ctx *ctx) if (r) return r; } - return 0; + return tgsi_helper_copy(ctx, inst); } static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = { @@ -682,8 +723,8 @@ static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = { {TGSI_OPCODE_LOG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_MUL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2}, {TGSI_OPCODE_ADD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2}, - {TGSI_OPCODE_DP3, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_op2}, - {TGSI_OPCODE_DP4, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_op2}, + {TGSI_OPCODE_DP3, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, + {TGSI_OPCODE_DP4, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, {TGSI_OPCODE_DST, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_MIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_MAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2}, @@ -747,7 +788,7 @@ static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = { {TGSI_OPCODE_TXB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_NRM, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_DIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_DP2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_op2}, + {TGSI_OPCODE_DP2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, {TGSI_OPCODE_TXL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_BRK, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_IF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, -- cgit v1.2.3 From bd27db400a433d842b5ede862e14136a1e2d8119 Mon Sep 17 00:00:00 2001 From: Vinson Lee Date: Fri, 23 Jul 2010 17:50:35 -0700 Subject: r600g: Fix SCons build. --- src/gallium/drivers/r600/SConscript | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/SConscript b/src/gallium/drivers/r600/SConscript index 26e2f1941c..99c8644e02 100644 --- a/src/gallium/drivers/r600/SConscript +++ b/src/gallium/drivers/r600/SConscript @@ -27,11 +27,8 @@ r600 = env.ConvenienceLibrary( 'r600_state.c', 'r600_texture.c', 'r600_shader.c', - 'r600_compiler.c', - 'r600_compiler_tgsi.c', - 'r600_compiler_dump.c', - 'r600_compiler_r600.c', - 'r600_compiler_r700.c' + 'r600_asm.c', + 'r700_asm.c', ]) Export('r600') -- cgit v1.2.3 From 33241134e6e3d5bf19141eceff90fd854b23386a Mon Sep 17 00:00:00 2001 From: Jerome Glisse Date: Fri, 23 Jul 2010 20:55:48 -0400 Subject: r600g: first pass at texture support This add texture support to the assembler, generated code is wrong (tested against working dump). Signed-off-by: Jerome Glisse --- src/gallium/drivers/r600/r600_asm.c | 91 +++++++++++++++++++++++++++++++-- src/gallium/drivers/r600/r600_asm.h | 29 +++++++++++ src/gallium/drivers/r600/r600_context.c | 2 +- src/gallium/drivers/r600/r600_helper.c | 11 ++-- src/gallium/drivers/r600/r600_shader.c | 31 ++++++++--- src/gallium/drivers/r600/r600_sq.h | 2 + 6 files changed, 152 insertions(+), 14 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c index 6e48703a57..e678a2fdf2 100644 --- a/src/gallium/drivers/r600/r600_asm.c +++ b/src/gallium/drivers/r600/r600_asm.c @@ -38,6 +38,7 @@ static struct r600_bc_cf *r600_bc_cf(void) LIST_INITHEAD(&cf->list); LIST_INITHEAD(&cf->alu); LIST_INITHEAD(&cf->vtx); + LIST_INITHEAD(&cf->tex); return cf; } @@ -61,6 +62,16 @@ static struct r600_bc_vtx *r600_bc_vtx(void) return vtx; } +static struct r600_bc_tex *r600_bc_tex(void) +{ + struct r600_bc_tex *tex = CALLOC_STRUCT(r600_bc_tex); + + if (tex == NULL) + return NULL; + LIST_INITHEAD(&tex->list); + return tex; +} + int r600_bc_init(struct r600_bc *bc, enum radeon_family family) { LIST_INITHEAD(&bc->cf); @@ -149,8 +160,14 @@ int r600_bc_add_literal(struct r600_bc *bc, const u32 *value) { struct r600_bc_alu *alu; - if (bc->cf_last == NULL || - bc->cf_last->inst != (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3) || + if (bc->cf_last == NULL) { + R600_ERR("no last CF\n"); + return -EINVAL; + } + if (bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_TEX) { + return 0; + } + if (bc->cf_last->inst != (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3) || LIST_IS_EMPTY(&bc->cf_last->alu)) { R600_ERR("last CF is not ALU (%p)\n", bc->cf_last); return -EINVAL; @@ -186,13 +203,39 @@ int r600_bc_add_vtx(struct r600_bc *bc, const struct r600_bc_vtx *vtx) bc->cf_last->inst = V_SQ_CF_WORD1_SQ_CF_INST_VTX; } LIST_ADDTAIL(&nvtx->list, &bc->cf_last->vtx); - /* each fetch use 6 dwords */ + /* each fetch use 4 dwords */ bc->cf_last->ndw += 4; bc->ndw += 4; return 0; } -int r600_bc_vtx_build(struct r600_bc *bc, struct r600_bc_vtx *vtx, unsigned id) +int r600_bc_add_tex(struct r600_bc *bc, const struct r600_bc_tex *tex) +{ + struct r600_bc_tex *ntex = r600_bc_tex(); + int r; + + if (ntex == NULL) + return -ENOMEM; + memcpy(ntex, tex, sizeof(struct r600_bc_tex)); + + /* cf can contains only alu or only vtx or only tex */ + if (bc->cf_last == NULL || + bc->cf_last->inst != V_SQ_CF_WORD1_SQ_CF_INST_TEX) { + r = r600_bc_add_cf(bc); + if (r) { + free(ntex); + return r; + } + bc->cf_last->inst = V_SQ_CF_WORD1_SQ_CF_INST_TEX; + } + LIST_ADDTAIL(&ntex->list, &bc->cf_last->tex); + /* each texture fetch use 4 dwords */ + bc->cf_last->ndw += 4; + bc->ndw += 4; + return 0; +} + +static int r600_bc_vtx_build(struct r600_bc *bc, struct r600_bc_vtx *vtx, unsigned id) { bc->bytecode[id++] = S_SQ_VTX_WORD0_BUFFER_ID(vtx->buffer_id) | S_SQ_VTX_WORD0_SRC_GPR(vtx->src_gpr) | @@ -209,6 +252,35 @@ int r600_bc_vtx_build(struct r600_bc *bc, struct r600_bc_vtx *vtx, unsigned id) return 0; } +static int r600_bc_tex_build(struct r600_bc *bc, struct r600_bc_tex *tex, unsigned id) +{ + bc->bytecode[id++] = S_SQ_TEX_WORD0_TEX_INST(tex->inst) | + S_SQ_TEX_WORD0_RESOURCE_ID(tex->resource_id) | + S_SQ_TEX_WORD0_SRC_GPR(tex->src_gpr) | + S_SQ_TEX_WORD0_SRC_REL(tex->src_rel); + bc->bytecode[id++] = S_SQ_TEX_WORD1_DST_GPR(tex->dst_gpr) | + S_SQ_TEX_WORD1_DST_REL(tex->dst_rel) | + S_SQ_TEX_WORD1_DST_SEL_X(tex->dst_sel_x) | + S_SQ_TEX_WORD1_DST_SEL_Y(tex->dst_sel_y) | + S_SQ_TEX_WORD1_DST_SEL_Z(tex->dst_sel_z) | + S_SQ_TEX_WORD1_DST_SEL_W(tex->dst_sel_w) | + S_SQ_TEX_WORD1_LOD_BIAS(tex->lod_bias) | + S_SQ_TEX_WORD1_COORD_TYPE_X(tex->coord_type_x) | + S_SQ_TEX_WORD1_COORD_TYPE_Y(tex->coord_type_y) | + S_SQ_TEX_WORD1_COORD_TYPE_Z(tex->coord_type_z) | + S_SQ_TEX_WORD1_COORD_TYPE_W(tex->coord_type_w); + bc->bytecode[id++] = S_SQ_TEX_WORD2_OFFSET_X(tex->offset_x) | + S_SQ_TEX_WORD2_OFFSET_Y(tex->offset_y) | + S_SQ_TEX_WORD2_OFFSET_Z(tex->offset_z) | + S_SQ_TEX_WORD2_SAMPLER_ID(tex->sampler_id) | + S_SQ_TEX_WORD2_SRC_SEL_X(tex->src_sel_x) | + S_SQ_TEX_WORD2_SRC_SEL_Y(tex->src_sel_y) | + S_SQ_TEX_WORD2_SRC_SEL_Z(tex->src_sel_z) | + S_SQ_TEX_WORD2_SRC_SEL_W(tex->src_sel_w); + bc->bytecode[id++] = 0; + return 0; +} + int r600_bc_alu_build(struct r600_bc *bc, struct r600_bc_alu *alu, unsigned id) { unsigned i; @@ -262,6 +334,7 @@ int r600_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf) S_SQ_CF_ALU_WORD1_BARRIER(1) | S_SQ_CF_ALU_WORD1_COUNT((cf->ndw / 2) - 1); break; + case V_SQ_CF_WORD1_SQ_CF_INST_TEX: case V_SQ_CF_WORD1_SQ_CF_INST_VTX: case V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC: bc->bytecode[id++] = S_SQ_CF_WORD0_ADDR(cf->addr >> 1); @@ -295,6 +368,7 @@ int r600_bc_build(struct r600_bc *bc) struct r600_bc_cf *cf; struct r600_bc_alu *alu; struct r600_bc_vtx *vtx; + struct r600_bc_tex *tex; unsigned addr; int r; @@ -306,6 +380,7 @@ int r600_bc_build(struct r600_bc *bc) switch (cf->inst) { case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3): break; + case V_SQ_CF_WORD1_SQ_CF_INST_TEX: case V_SQ_CF_WORD1_SQ_CF_INST_VTX: case V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC: /* fetch node need to be 16 bytes aligned*/ @@ -373,6 +448,14 @@ int r600_bc_build(struct r600_bc *bc) addr += 4; } break; + case V_SQ_CF_WORD1_SQ_CF_INST_TEX: + LIST_FOR_EACH_ENTRY(tex, &cf->tex, list) { + r = r600_bc_tex_build(bc, tex, addr); + if (r) + return r; + addr += 4; + } + break; case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT: case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE: break; diff --git a/src/gallium/drivers/r600/r600_asm.h b/src/gallium/drivers/r600/r600_asm.h index 8a874a9df5..88fb957440 100644 --- a/src/gallium/drivers/r600/r600_asm.h +++ b/src/gallium/drivers/r600/r600_asm.h @@ -51,6 +51,33 @@ struct r600_bc_alu { u32 value[4]; }; +struct r600_bc_tex { + struct list_head list; + unsigned inst; + unsigned resource_id; + unsigned src_gpr; + unsigned src_rel; + unsigned dst_gpr; + unsigned dst_rel; + unsigned dst_sel_x; + unsigned dst_sel_y; + unsigned dst_sel_z; + unsigned dst_sel_w; + unsigned lod_bias; + unsigned coord_type_x; + unsigned coord_type_y; + unsigned coord_type_z; + unsigned coord_type_w; + unsigned offset_x; + unsigned offset_y; + unsigned offset_z; + unsigned sampler_id; + unsigned src_sel_x; + unsigned src_sel_y; + unsigned src_sel_z; + unsigned src_sel_w; +}; + struct r600_bc_vtx { struct list_head list; unsigned inst; @@ -87,6 +114,7 @@ struct r600_bc_cf { unsigned ndw; unsigned id; struct list_head alu; + struct list_head tex; struct list_head vtx; struct r600_bc_output output; }; @@ -106,6 +134,7 @@ int r600_bc_init(struct r600_bc *bc, enum radeon_family family); int r600_bc_add_alu(struct r600_bc *bc, const struct r600_bc_alu *alu); int r600_bc_add_literal(struct r600_bc *bc, const u32 *value); int r600_bc_add_vtx(struct r600_bc *bc, const struct r600_bc_vtx *vtx); +int r600_bc_add_tex(struct r600_bc *bc, const struct r600_bc_tex *tex); int r600_bc_add_output(struct r600_bc *bc, const struct r600_bc_output *output); int r600_bc_build(struct r600_bc *bc); diff --git a/src/gallium/drivers/r600/r600_context.c b/src/gallium/drivers/r600/r600_context.c index 3c5195f79e..05575b5767 100644 --- a/src/gallium/drivers/r600/r600_context.c +++ b/src/gallium/drivers/r600/r600_context.c @@ -55,7 +55,7 @@ static void r600_flush(struct pipe_context *ctx, unsigned flags, */ if (!dc) radeon_ctx_dump_bof(rctx->ctx, "gallium.bof"); -#if 1 +#if 0 radeon_ctx_submit(rctx->ctx); #endif rctx->ctx = radeon_ctx_decref(rctx->ctx); diff --git a/src/gallium/drivers/r600/r600_helper.c b/src/gallium/drivers/r600/r600_helper.c index e3175b627a..7241ab1c17 100644 --- a/src/gallium/drivers/r600/r600_helper.c +++ b/src/gallium/drivers/r600/r600_helper.c @@ -27,6 +27,7 @@ #include #include #include "r600_screen.h" +#include "r600_context.h" #include "r600d.h" int r600_conv_pipe_format(unsigned pformat, unsigned *format) @@ -49,6 +50,12 @@ int r600_conv_pipe_format(unsigned pformat, unsigned *format) case PIPE_FORMAT_R8G8B8A8_SSCALED: *format = V_0280A0_COLOR_8_8_8_8; return 0; + case PIPE_FORMAT_R32_FLOAT: + *format = V_0280A0_COLOR_32_FLOAT; + return 0; + case PIPE_FORMAT_R32G32_FLOAT: + *format = V_0280A0_COLOR_32_32_FLOAT; + return 0; case PIPE_FORMAT_L8_UNORM: case PIPE_FORMAT_A8_UNORM: case PIPE_FORMAT_I8_UNORM: @@ -60,8 +67,6 @@ int r600_conv_pipe_format(unsigned pformat, unsigned *format) case PIPE_FORMAT_R64G64_FLOAT: case PIPE_FORMAT_R64G64B64_FLOAT: case PIPE_FORMAT_R64G64B64A64_FLOAT: - case PIPE_FORMAT_R32_FLOAT: - case PIPE_FORMAT_R32G32_FLOAT: case PIPE_FORMAT_R32_UNORM: case PIPE_FORMAT_R32G32_UNORM: case PIPE_FORMAT_R32G32B32_UNORM: @@ -111,7 +116,7 @@ int r600_conv_pipe_format(unsigned pformat, unsigned *format) case PIPE_FORMAT_R32G32B32_FIXED: case PIPE_FORMAT_R32G32B32A32_FIXED: default: - fprintf(stderr, "%s:%d unsupported %d\n", __func__, __LINE__, pformat); + R600_ERR("unsupported %d\n", pformat); return -EINVAL; } } diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index d788ab88be..e865f013f7 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -23,6 +23,7 @@ #include "pipe/p_shader_tokens.h" #include "tgsi/tgsi_parse.h" #include "tgsi/tgsi_scan.h" +#include "tgsi/tgsi_dump.h" #include "util/u_format.h" #include "r600_screen.h" #include "r600_context.h" @@ -259,10 +260,6 @@ static int tgsi_is_supported(struct r600_shader_ctx *ctx) R600_ERR("label unsupported\n"); return -EINVAL; } - if (i->Instruction.Texture) { - R600_ERR("texture unsupported\n"); - return -EINVAL; - } for (j = 0; j < i->Instruction.NumSrcRegs; j++) { if (i->Src[j].Register.Indirect || i->Src[j].Register.Dimension || @@ -321,6 +318,7 @@ static int tgsi_declaration(struct r600_shader_ctx *ctx) break; case TGSI_FILE_CONSTANT: case TGSI_FILE_TEMPORARY: + case TGSI_FILE_SAMPLER: break; default: R600_ERR("unsupported file %d declaration\n", d->Declaration.File); @@ -381,7 +379,6 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s tgsi_parse_token(&ctx.parse); switch (ctx.parse.FullToken.Token.Type) { case TGSI_TOKEN_TYPE_IMMEDIATE: -// R600_ERR("TGSI_TOKEN_TYPE_IMMEDIATE unsupported\n"); immediate = &ctx.parse.FullToken.FullImmediate; value[0] = immediate->u[0].Uint; value[1] = immediate->u[1].Uint; @@ -713,6 +710,28 @@ static int tgsi_dp(struct r600_shader_ctx *ctx) return tgsi_helper_copy(ctx, inst); } +static int tgsi_tex(struct r600_shader_ctx *ctx) +{ + struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; + struct r600_bc_tex tex; + + memset(&tex, 0, sizeof(struct r600_bc_tex)); + tex.inst = ctx->inst_info->r600_opcode; + tex.resource_id = ctx->file_offset[inst->Src[1].Register.File] + inst->Src[1].Register.Index; + tex.sampler_id = tex.resource_id; + tex.src_gpr = ctx->file_offset[inst->Src[0].Register.File] + inst->Src[0].Register.Index; + tex.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Src[0].Register.Index; + tex.dst_sel_x = 0; + tex.dst_sel_y = 1; + tex.dst_sel_z = 2; + tex.dst_sel_w = 3; + tex.src_sel_x = 0; + tex.src_sel_y = 1; + tex.src_sel_z = 2; + tex.src_sel_w = 3; + return r600_bc_add_tex(ctx->bc, &tex); +} + static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = { {TGSI_OPCODE_ARL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_MOV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2}, @@ -771,7 +790,7 @@ static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = { {TGSI_OPCODE_STR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_TEX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_TXD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_TXP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_TXP, 0, 0x10, tgsi_tex}, {TGSI_OPCODE_UP2H, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_UP2US, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_UP4B, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, diff --git a/src/gallium/drivers/r600/r600_sq.h b/src/gallium/drivers/r600/r600_sq.h index 4770ab0bf7..002660c654 100644 --- a/src/gallium/drivers/r600/r600_sq.h +++ b/src/gallium/drivers/r600/r600_sq.h @@ -546,6 +546,8 @@ #define S_SQ_TEX_WORD1_COORD_TYPE_X(x) (((x) & 0x1) << 28) #define G_SQ_TEX_WORD1_COORD_TYPE_X(x) (((x) >> 28) & 0x1) #define C_SQ_TEX_WORD1_COORD_TYPE_X 0xEFFFFFFF +#define V_SQ_TEX_WORD1_COORD_UNNORMALIZED 0x00000000 +#define V_SQ_TEX_WORD1_COORD_NORMALIZED 0x00000001 #define S_SQ_TEX_WORD1_COORD_TYPE_Y(x) (((x) & 0x1) << 29) #define G_SQ_TEX_WORD1_COORD_TYPE_Y(x) (((x) >> 29) & 0x1) #define C_SQ_TEX_WORD1_COORD_TYPE_Y 0xDFFFFFFF -- cgit v1.2.3 From 5603d2e4c4dd64f8d8ba265dde216bf42a9ac945 Mon Sep 17 00:00:00 2001 From: Vinson Lee Date: Sat, 24 Jul 2010 21:10:45 -0700 Subject: nvfx: Move declaration before code. --- src/gallium/drivers/nvfx/nvfx_fragprog.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nvfx/nvfx_fragprog.c b/src/gallium/drivers/nvfx/nvfx_fragprog.c index 6772d9bd51..ee41f03b9b 100644 --- a/src/gallium/drivers/nvfx/nvfx_fragprog.c +++ b/src/gallium/drivers/nvfx/nvfx_fragprog.c @@ -842,7 +842,6 @@ nvfx_fragprog_validate(struct nvfx_context *nvfx) struct nouveau_channel* chan = nvfx->screen->base.channel; struct nvfx_fragment_program *fp = nvfx->fragprog; int update = 0; - int i; if (!fp->translated) { @@ -895,6 +894,7 @@ nvfx_fragprog_validate(struct nvfx_context *nvfx) { struct nvfx_fragment_program_bo* fpbo = os_malloc_aligned(sizeof(struct nvfx_fragment_program) + fp->prog_size * fp->progs_per_bo, 16); char *map, *buf; + int i; if(fp->fpbo) { @@ -910,7 +910,7 @@ nvfx_fragprog_validate(struct nvfx_context *nvfx) map = fpbo->bo->map; buf = fpbo->insn; - for(int i = 0; i < fp->progs_per_bo; ++i) + for(i = 0; i < fp->progs_per_bo; ++i) { memcpy(buf, fp->insn, fp->insn_len * 4); nvfx_fp_memcpy(map, fp->insn, fp->insn_len * 4); @@ -931,6 +931,7 @@ nvfx_fragprog_validate(struct nvfx_context *nvfx) uint32_t* map = pipe_buffer_map(&nvfx->pipe, constbuf, PIPE_TRANSFER_READ, &transfer); uint32_t* fpmap = (uint32_t*)((char*)fp->fpbo->bo->map + offset); uint32_t* buf = (uint32_t*)((char*)fp->fpbo->insn + offset); + int i; for (i = 0; i < fp->nr_consts; ++i) { unsigned off = fp->consts[i].offset; unsigned idx = fp->consts[i].index * 4; -- cgit v1.2.3 From 3adb5c7d45f4a38b0fbb66633c808f69082918cc Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Sat, 24 Jul 2010 03:07:45 +0200 Subject: r300g: do not align texture height to 2^n for 1D and 2D non-mipmapped textures I don't remember why the alignment was there, but it seems to be no longer needed. I guess it was a dirty fix for some other bug. --- src/gallium/drivers/r300/r300_texture.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_texture.c b/src/gallium/drivers/r300/r300_texture.c index 5750bc4329..f7c167d1bf 100644 --- a/src/gallium/drivers/r300/r300_texture.c +++ b/src/gallium/drivers/r300/r300_texture.c @@ -774,7 +774,11 @@ static unsigned r300_texture_get_nblocksy(struct r300_texture* tex, height = align(height, tile_height); /* This is needed for the kernel checker, unfortunately. */ - height = util_next_power_of_two(height); + if ((tex->b.b.target != PIPE_TEXTURE_1D && + tex->b.b.target != PIPE_TEXTURE_2D) || + tex->b.b.last_level != 0) { + height = util_next_power_of_two(height); + } } return util_format_get_nblocksy(tex->b.b.format, height); -- cgit v1.2.3 From 4ce26210842176c4b280b7db85639ced40d4083d Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Sat, 24 Jul 2010 03:17:32 +0200 Subject: r300g: cleanup texture debug logging --- src/gallium/drivers/r300/r300_debug.c | 1 + src/gallium/drivers/r300/r300_screen.h | 1 + src/gallium/drivers/r300/r300_texture.c | 41 ++++++++++++++++++--------------- 3 files changed, 25 insertions(+), 18 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_debug.c b/src/gallium/drivers/r300/r300_debug.c index 31d4e14681..053a64ea6d 100644 --- a/src/gallium/drivers/r300/r300_debug.c +++ b/src/gallium/drivers/r300/r300_debug.c @@ -38,6 +38,7 @@ static const struct debug_named_value debug_options[] = { { "fall", DBG_FALL, "Fallbacks (for debugging)" }, { "rs", DBG_RS, "Rasterizer (for debugging)" }, { "fb", DBG_FB, "Framebuffer (for debugging)" }, + { "cbzb", DBG_CBZB, "Fast color clear info (for debugging)" }, { "fakeocc", DBG_FAKE_OCC, "Use fake occlusion queries (for debugging)" }, { "anisohq", DBG_ANISOHQ, "High quality anisotropic filtering (for benchmarking)" }, { "notiling", DBG_NO_TILING, "Disable tiling (for benchmarking)" }, diff --git a/src/gallium/drivers/r300/r300_screen.h b/src/gallium/drivers/r300/r300_screen.h index edc494ff6c..18745b83a0 100644 --- a/src/gallium/drivers/r300/r300_screen.h +++ b/src/gallium/drivers/r300/r300_screen.h @@ -90,6 +90,7 @@ r300_winsys_screen(struct pipe_screen *screen) { #define DBG_FALL (1 << 8) #define DBG_FB (1 << 9) #define DBG_RS_BLOCK (1 << 10) +#define DBG_CBZB (1 << 11) /* Features. */ #define DBG_ANISOHQ (1 << 16) #define DBG_NO_TILING (1 << 17) diff --git a/src/gallium/drivers/r300/r300_texture.c b/src/gallium/drivers/r300/r300_texture.c index f7c167d1bf..176fa76920 100644 --- a/src/gallium/drivers/r300/r300_texture.c +++ b/src/gallium/drivers/r300/r300_texture.c @@ -958,6 +958,21 @@ struct u_resource_vtbl r300_texture_vtbl = u_default_transfer_inline_write /* transfer_inline_write */ }; +static void r300_tex_print_info(struct r300_screen *rscreen, struct r300_texture *tex, + const char *func) +{ + fprintf(stderr, + "r300: %s: Macro: %s, Micro: %s, Pitch: %i, Dim: %ix%ix%i, " + "LastLevel: %i, Size: %i, Format: %s\n", + func, + tex->macrotile ? "YES" : " NO", + tex->microtile ? "YES" : " NO", + tex->hwpitch[0], + tex->b.b.width0, tex->b.b.height0, tex->b.b.depth0, + tex->b.b.last_level, tex->size, + util_format_short_name(tex->b.b.format)); +} + /* Create a new texture. */ struct pipe_resource* r300_texture_create(struct pipe_screen* screen, const struct pipe_resource* base) @@ -997,15 +1012,9 @@ struct pipe_resource* r300_texture_create(struct pipe_screen* screen, r300_texture_setup_immutable_state(rscreen, tex); r300_texture_setup_fb_state(rscreen, tex); - SCREEN_DBG(rscreen, DBG_TEX, - "r300: texture_create: Macro: %s, Micro: %s, Pitch: %i, " - "Dim: %ix%ix%i, LastLevel: %i, Size: %i, Format: %s\n", - tex->macrotile ? "YES" : " NO", - tex->microtile ? "YES" : " NO", - tex->hwpitch[0], - base->width0, base->height0, base->depth0, base->last_level, - tex->size, - util_format_short_name(base->format)); + if (SCREEN_DBG_ON(rscreen, DBG_TEX)) { + r300_tex_print_info(rscreen, tex, "texture_create"); + } tex->domain = base->flags & R300_RESOURCE_FLAG_TRANSFER ? R300_DOMAIN_GTT : @@ -1084,7 +1093,7 @@ struct pipe_surface* r300_get_tex_surface(struct pipe_screen* screen, else surface->cbzb_format = R300_DEPTHFORMAT_16BIT_INT_Z; - SCREEN_DBG(r300_screen(screen), DBG_TEX, + SCREEN_DBG(r300_screen(screen), DBG_CBZB, "CBZB Dim: %ix%i, Misalignment: %i, Macro: %s\n", surface->cbzb_width, surface->cbzb_height, offset & 2047, @@ -1144,14 +1153,6 @@ r300_texture_from_handle(struct pipe_screen* screen, rws->buffer_get_tiling(rws, buffer, &tex->microtile, &tex->macrotile); r300_setup_flags(tex); - SCREEN_DBG(rscreen, DBG_TEX, - "r300: texture_from_handle: Macro: %s, Micro: %s, " - "Pitch: % 4i, Dim: %ix%i, Format: %s\n", - tex->macrotile ? "YES" : " NO", - tex->microtile ? "YES" : " NO", - stride / util_format_get_blocksize(base->format), - base->width0, base->height0, - util_format_short_name(base->format)); /* Enforce microtiled zbuffer. */ override_zb_flags = util_format_is_depth_or_stencil(base->format) && @@ -1184,5 +1185,9 @@ r300_texture_from_handle(struct pipe_screen* screen, tex->microtile, tex->macrotile, tex->pitch[0] * util_format_get_blocksize(tex->b.b.format)); } + + if (SCREEN_DBG_ON(rscreen, DBG_TEX)) + r300_tex_print_info(rscreen, tex, "texture_from_handle"); + return (struct pipe_resource*)tex; } -- cgit v1.2.3 From 065e3f7ff2a9b6170e51b0104036088e8d163ea0 Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Sat, 24 Jul 2010 03:34:18 +0200 Subject: r300g: reject resources from handles which are not large enough The driver gets a buffer and its size in resource_from_handle. It computes the required minimum buffer size from given texture properties, and compares the two sizes. This is to early detect DDX bugs. --- src/gallium/drivers/r300/r300_context.h | 7 ++++++- src/gallium/drivers/r300/r300_texture.c | 18 +++++++++++++++--- src/gallium/drivers/r300/r300_winsys.h | 7 ++++--- src/gallium/winsys/radeon/drm/radeon_drm_buffer.c | 2 +- src/gallium/winsys/radeon/drm/radeon_r300.c | 11 ++++++++--- 5 files changed, 34 insertions(+), 11 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h index b9c96d5bdd..7b58587a2a 100644 --- a/src/gallium/drivers/r300/r300_context.h +++ b/src/gallium/drivers/r300/r300_context.h @@ -352,9 +352,14 @@ struct r300_texture { */ unsigned stride_override; - /* Total size of this texture, in bytes. */ + /* Total size of this texture, in bytes, + * derived from the texture properties. */ unsigned size; + /* Total size of the buffer backing this texture, in bytes. + * It must be >= size. */ + unsigned buffer_size; + /* Whether this texture has non-power-of-two dimensions * or a user-specified pitch. * It can be either a regular texture or a rectangle one. diff --git a/src/gallium/drivers/r300/r300_texture.c b/src/gallium/drivers/r300/r300_texture.c index 176fa76920..711042722c 100644 --- a/src/gallium/drivers/r300/r300_texture.c +++ b/src/gallium/drivers/r300/r300_texture.c @@ -1022,6 +1022,7 @@ struct pipe_resource* r300_texture_create(struct pipe_screen* screen, tex->buffer = rws->buffer_create(rws, tex->size, 2048, base->bind, base->usage, tex->domain); + tex->buffer_size = tex->size; if (!tex->buffer) { FREE(tex); @@ -1120,7 +1121,7 @@ r300_texture_from_handle(struct pipe_screen* screen, struct r300_screen* rscreen = r300_screen(screen); struct r300_winsys_buffer *buffer; struct r300_texture* tex; - unsigned stride; + unsigned stride, size; boolean override_zb_flags; /* Support only 2D textures without mipmaps */ @@ -1130,7 +1131,7 @@ r300_texture_from_handle(struct pipe_screen* screen, return NULL; } - buffer = rws->buffer_from_handle(rws, whandle, &stride); + buffer = rws->buffer_from_handle(rws, whandle, &stride, &size); if (!buffer) { return NULL; } @@ -1150,6 +1151,7 @@ r300_texture_from_handle(struct pipe_screen* screen, /* one ref already taken */ tex->buffer = buffer; + tex->buffer_size = size; rws->buffer_get_tiling(rws, buffer, &tex->microtile, &tex->macrotile); r300_setup_flags(tex); @@ -1186,8 +1188,18 @@ r300_texture_from_handle(struct pipe_screen* screen, tex->pitch[0] * util_format_get_blocksize(tex->b.b.format)); } - if (SCREEN_DBG_ON(rscreen, DBG_TEX)) + /* Make sure the buffer we got is large enough. */ + if (tex->size > tex->buffer_size) { + fprintf(stderr, "r300: texture_from_handle: The buffer is not " + "large enough. Got: %i, Need: %i, Info:\n", + tex->buffer_size, tex->size); r300_tex_print_info(rscreen, tex, "texture_from_handle"); + pipe_resource_reference((struct pipe_resource**)&tex, NULL); + return NULL; + } else { + if (SCREEN_DBG_ON(rscreen, DBG_TEX)) + r300_tex_print_info(rscreen, tex, "texture_from_handle"); + } return (struct pipe_resource*)tex; } diff --git a/src/gallium/drivers/r300/r300_winsys.h b/src/gallium/drivers/r300/r300_winsys.h index 7e115c2d62..ff11546a64 100644 --- a/src/gallium/drivers/r300/r300_winsys.h +++ b/src/gallium/drivers/r300/r300_winsys.h @@ -184,12 +184,13 @@ struct r300_winsys_screen { * \param ws The winsys this function is called from. * \param whandle A winsys handle pointer as was received from a state * tracker. - * \param stride A pointer to the stride return variable. - * The stride is in bytes. + * \param stride The returned buffer stride in bytes. + * \param size The returned buffer size. */ struct r300_winsys_buffer *(*buffer_from_handle)(struct r300_winsys_screen *ws, struct winsys_handle *whandle, - unsigned *stride); + unsigned *stride, + unsigned *size); /** * Get a winsys handle from a winsys buffer. The internal structure diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_buffer.c b/src/gallium/winsys/radeon/drm/radeon_drm_buffer.c index 5ea5912089..017eac8464 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_buffer.c +++ b/src/gallium/winsys/radeon/drm/radeon_drm_buffer.c @@ -189,7 +189,7 @@ struct pb_buffer *radeon_drm_bufmgr_create_buffer_from_handle(struct pb_manager pipe_reference_init(&buf->base.base.reference, 1); buf->base.base.alignment = 0; buf->base.base.usage = PB_USAGE_GPU_WRITE | PB_USAGE_GPU_READ; - buf->base.base.size = 0; + buf->base.base.size = bo->size; buf->base.vtbl = &radeon_drm_buffer_vtbl; buf->mgr = mgr; diff --git a/src/gallium/winsys/radeon/drm/radeon_r300.c b/src/gallium/winsys/radeon/drm/radeon_r300.c index effa27f5c7..5544504067 100644 --- a/src/gallium/winsys/radeon/drm/radeon_r300.c +++ b/src/gallium/winsys/radeon/drm/radeon_r300.c @@ -109,14 +109,19 @@ static void radeon_r300_winsys_buffer_reference(struct r300_winsys_screen *rws, static struct r300_winsys_buffer *radeon_r300_winsys_buffer_from_handle(struct r300_winsys_screen *rws, struct winsys_handle *whandle, - unsigned *stride) + unsigned *stride, + unsigned *size) { struct radeon_libdrm_winsys *ws = radeon_libdrm_winsys(rws); struct pb_buffer *_buf; - *stride = whandle->stride; - _buf = radeon_drm_bufmgr_create_buffer_from_handle(ws->kman, whandle->handle); + + if (stride) + *stride = whandle->stride; + if (size) + *size = _buf->base.size; + return radeon_libdrm_winsys_buffer(_buf); } -- cgit v1.2.3 From d779a5d16ae6a17b3fc0c097f4eb477a80e54566 Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Sat, 24 Jul 2010 21:32:53 +0200 Subject: r300g: cleanup texture creation code This decouples initializing a texture layout/miptree description from an actual texture creation, it also partially unifies texture_create and texture_from_handle. r300_texture inherits r300_texture_desc, which inherits u_resource. The CBZB clear criteria are moved to r300_texture_desc::cbzb_allowed[level]. And other minor cleanups. --- src/gallium/drivers/r300/Makefile | 1 + src/gallium/drivers/r300/SConscript | 1 + src/gallium/drivers/r300/r300_blit.c | 19 +- src/gallium/drivers/r300/r300_context.h | 64 +-- src/gallium/drivers/r300/r300_defines.h | 5 +- src/gallium/drivers/r300/r300_fs.c | 2 +- src/gallium/drivers/r300/r300_state.c | 15 +- src/gallium/drivers/r300/r300_state_derived.c | 8 +- src/gallium/drivers/r300/r300_texture.c | 593 ++++++-------------------- src/gallium/drivers/r300/r300_texture.h | 6 - src/gallium/drivers/r300/r300_texture_desc.c | 456 ++++++++++++++++++++ src/gallium/drivers/r300/r300_texture_desc.h | 57 +++ src/gallium/drivers/r300/r300_transfer.c | 49 ++- 13 files changed, 733 insertions(+), 543 deletions(-) create mode 100644 src/gallium/drivers/r300/r300_texture_desc.c create mode 100644 src/gallium/drivers/r300/r300_texture_desc.h (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/Makefile b/src/gallium/drivers/r300/Makefile index 13152635a6..728bc40a5b 100644 --- a/src/gallium/drivers/r300/Makefile +++ b/src/gallium/drivers/r300/Makefile @@ -24,6 +24,7 @@ C_SOURCES = \ r300_vs.c \ r300_vs_draw.c \ r300_texture.c \ + r300_texture_desc.c \ r300_tgsi_to_rc.c \ r300_transfer.c diff --git a/src/gallium/drivers/r300/SConscript b/src/gallium/drivers/r300/SConscript index 552ed4e5be..bf023daaa5 100644 --- a/src/gallium/drivers/r300/SConscript +++ b/src/gallium/drivers/r300/SConscript @@ -34,6 +34,7 @@ r300 = env.ConvenienceLibrary( 'r300_vs.c', 'r300_vs_draw.c', 'r300_texture.c', + 'r300_texture_desc.c', 'r300_tgsi_to_rc.c', 'r300_transfer.c', ] + r300compiler) + r300compiler diff --git a/src/gallium/drivers/r300/r300_blit.c b/src/gallium/drivers/r300/r300_blit.c index 895efaa1c4..d125196b6d 100644 --- a/src/gallium/drivers/r300/r300_blit.c +++ b/src/gallium/drivers/r300/r300_blit.c @@ -97,29 +97,12 @@ static boolean r300_cbzb_clear_allowed(struct r300_context *r300, { struct pipe_framebuffer_state *fb = (struct pipe_framebuffer_state*)r300->fb_state.state; - struct r300_surface *surf = r300_surface(fb->cbufs[0]); - unsigned bpp; /* Only color clear allowed, and only one colorbuffer. */ if (clear_buffers != PIPE_CLEAR_COLOR || fb->nr_cbufs != 1) return FALSE; - /* The colorbuffer must be point-sampled. */ - if (surf->base.texture->nr_samples > 1) - return FALSE; - - bpp = util_format_get_blocksizebits(surf->base.format); - - /* ZB can only work with the two pixel sizes. */ - if (bpp != 16 && bpp != 32) - return FALSE; - - /* If the midpoint ZB offset is not aligned to 2048, it returns garbage - * with certain texture sizes. Macrotiling ensures the alignment. */ - if (!r300_texture(surf->base.texture)->mip_macrotile[surf->base.level]) - return FALSE; - - return TRUE; + return r300_surface(fb->cbufs[0])->cbzb_allowed; } /* Clear currently bound buffers. */ diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h index 7b58587a2a..06e4e12558 100644 --- a/src/gallium/drivers/r300/r300_context.h +++ b/src/gallium/drivers/r300/r300_context.h @@ -318,29 +318,38 @@ struct r300_surface { uint32_t cbzb_midpoint_offset; /* DEPTHOFFSET. */ uint32_t cbzb_pitch; /* DEPTHPITCH. */ uint32_t cbzb_format; /* ZB_FORMAT. */ + + /* Whether the CBZB clear is allowed on the surface. */ + boolean cbzb_allowed; }; -struct r300_texture { - /* Parent class */ +struct r300_texture_desc { + /* Parent class. */ struct u_resource b; - enum r300_buffer_domain domain; + /* Buffer tiling. + * Macrotiling is specified per-level because small mipmaps cannot + * be macrotiled. */ + enum r300_buffer_tiling microtile; + enum r300_buffer_tiling macrotile[R300_MAX_TEXTURE_LEVELS]; /* Offsets into the buffer. */ - unsigned offset[R300_MAX_TEXTURE_LEVELS]; + unsigned offset_in_bytes[R300_MAX_TEXTURE_LEVELS]; - /* A pitch for each mip-level */ - unsigned pitch[R300_MAX_TEXTURE_LEVELS]; + /* Strides for each mip-level. */ + unsigned stride_in_pixels[R300_MAX_TEXTURE_LEVELS]; + unsigned stride_in_bytes[R300_MAX_TEXTURE_LEVELS]; - /* A pitch multiplied by blockwidth as hardware wants - * the number of pixels instead of the number of blocks. */ - unsigned hwpitch[R300_MAX_TEXTURE_LEVELS]; + /* Size of one zslice or face or 2D image based on the texture target. */ + unsigned layer_size_in_bytes[R300_MAX_TEXTURE_LEVELS]; - /* Size of one zslice or face based on the texture target */ - unsigned layer_size[R300_MAX_TEXTURE_LEVELS]; + /* Total size of this texture, in bytes, + * derived from the texture properties. */ + unsigned size_in_bytes; - /* Whether the mipmap level is macrotiled. */ - enum r300_buffer_tiling mip_macrotile[R300_MAX_TEXTURE_LEVELS]; + /* Total size of the buffer backing this texture, in bytes. + * It must be >= size. */ + unsigned buffer_size_in_bytes; /** * If non-zero, override the natural texture layout with @@ -350,21 +359,25 @@ struct r300_texture { * * \sa r300_texture_get_stride */ - unsigned stride_override; - - /* Total size of this texture, in bytes, - * derived from the texture properties. */ - unsigned size; - - /* Total size of the buffer backing this texture, in bytes. - * It must be >= size. */ - unsigned buffer_size; + unsigned stride_in_bytes_override; /* Whether this texture has non-power-of-two dimensions - * or a user-specified pitch. + * or a user-specified stride. * It can be either a regular texture or a rectangle one. + * + * This flag says that hardware must use the stride for addressing + * instead of the width. */ - boolean uses_pitch; + boolean uses_stride_addressing; + + /* Whether CBZB fast color clear is allowed on the miplevel. */ + boolean cbzb_allowed[R300_MAX_TEXTURE_LEVELS]; +}; + +struct r300_texture { + struct r300_texture_desc desc; + + enum r300_buffer_domain domain; /* Pipe buffer backing this texture. */ struct r300_winsys_buffer *buffer; @@ -375,9 +388,6 @@ struct r300_texture { /* All bits should be filled in. */ struct r300_texture_fb_state fb_state; - /* Buffer tiling */ - enum r300_buffer_tiling microtile, macrotile; - /* This is the level tiling flags were last time set for. * It's used to prevent redundant tiling-flags changes from happening.*/ unsigned surface_level; diff --git a/src/gallium/drivers/r300/r300_defines.h b/src/gallium/drivers/r300/r300_defines.h index d510d80a7b..896aeef395 100644 --- a/src/gallium/drivers/r300/r300_defines.h +++ b/src/gallium/drivers/r300/r300_defines.h @@ -36,7 +36,10 @@ enum r300_buffer_tiling { R300_BUFFER_LINEAR = 0, R300_BUFFER_TILED, - R300_BUFFER_SQUARETILED + R300_BUFFER_SQUARETILED, + + R300_BUFFER_UNKNOWN, + R300_BUFFER_SELECT_LAYOUT = R300_BUFFER_UNKNOWN }; enum r300_buffer_domain { /* bitfield */ diff --git a/src/gallium/drivers/r300/r300_fs.c b/src/gallium/drivers/r300/r300_fs.c index b145ded639..6eac12bfb9 100644 --- a/src/gallium/drivers/r300/r300_fs.c +++ b/src/gallium/drivers/r300/r300_fs.c @@ -173,7 +173,7 @@ static void get_external_state( t = (struct r300_texture*)texstate->sampler_views[i]->base.texture; /* XXX this should probably take into account STR, not just S. */ - if (t->uses_pitch) { + if (t->desc.uses_stride_addressing) { switch (s->state.wrap_s) { case PIPE_TEX_WRAP_REPEAT: state->unit[i].wrap_mode = RC_WRAP_REPEAT; diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c index f52265b1c0..6e2a6ca0e4 100644 --- a/src/gallium/drivers/r300/r300_state.c +++ b/src/gallium/drivers/r300/r300_state.c @@ -619,7 +619,8 @@ static void r300_tex_set_tiling_flags(struct r300_context *r300, { /* Check if the macrotile flag needs to be changed. * Skip changing the flags otherwise. */ - if (tex->mip_macrotile[tex->surface_level] != tex->mip_macrotile[level]) { + if (tex->desc.macrotile[tex->surface_level] != + tex->desc.macrotile[level]) { /* Tiling determines how DRM treats the buffer data. * We must flush CS when changing it if the buffer is referenced. */ if (r300->rws->cs_is_buffer_referenced(r300->cs, @@ -627,8 +628,8 @@ static void r300_tex_set_tiling_flags(struct r300_context *r300, r300->context.flush(&r300->context, 0, NULL); r300->rws->buffer_set_tiling(r300->rws, tex->buffer, - tex->microtile, tex->mip_macrotile[level], - tex->pitch[0] * util_format_get_blocksize(tex->b.b.format)); + tex->desc.microtile, tex->desc.macrotile[level], + tex->desc.stride_in_bytes[0]); tex->surface_level = level; } @@ -670,8 +671,10 @@ static void r300_print_fb_surf_info(struct pipe_surface *surf, unsigned index, surf->zslice, surf->face, surf->level, util_format_short_name(surf->format), - rtex->macrotile ? "YES" : " NO", rtex->microtile ? "YES" : " NO", - rtex->hwpitch[0], tex->width0, tex->height0, tex->depth0, + rtex->desc.macrotile[0] ? "YES" : " NO", + rtex->desc.microtile ? "YES" : " NO", + rtex->desc.stride_in_pixels[0], + tex->width0, tex->height0, tex->depth0, tex->last_level, util_format_short_name(tex->format)); } @@ -1293,7 +1296,7 @@ static void r300_set_fragment_sampler_views(struct pipe_context* pipe, /* Set the texrect factor in the fragment shader. * Needed for RECT and NPOT fallback. */ texture = r300_texture(views[i]->texture); - if (texture->uses_pitch) { + if (texture->desc.uses_stride_addressing) { r300->fs_rc_constant_state.dirty = TRUE; } diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c index 2ef9766578..e20d8d0fdf 100644 --- a/src/gallium/drivers/r300/r300_state_derived.c +++ b/src/gallium/drivers/r300/r300_state_derived.c @@ -567,7 +567,7 @@ static void r300_merge_textures_and_samplers(struct r300_context* r300) * are stored in the format. * Otherwise, swizzles must be applied after the compare mode * in the fragment shader. */ - if (util_format_is_depth_or_stencil(tex->b.b.format)) { + if (util_format_is_depth_or_stencil(tex->desc.b.b.format)) { if (sampler->state.compare_mode == PIPE_TEX_COMPARE_NONE) { texstate->format.format1 |= r300_get_swizzle_combined(depth_swizzle, view->swizzle); @@ -578,12 +578,12 @@ static void r300_merge_textures_and_samplers(struct r300_context* r300) } /* to emulate 1D textures through 2D ones correctly */ - if (tex->b.b.target == PIPE_TEXTURE_1D) { + if (tex->desc.b.b.target == PIPE_TEXTURE_1D) { texstate->filter0 &= ~R300_TX_WRAP_T_MASK; texstate->filter0 |= R300_TX_WRAP_T(R300_TX_CLAMP_TO_EDGE); } - if (tex->uses_pitch) { + if (tex->desc.uses_stride_addressing) { /* NPOT textures don't support mip filter, unfortunately. * This prevents incorrect rendering. */ texstate->filter0 &= ~R300_TX_MIN_FILTER_MIP_MASK; @@ -610,7 +610,7 @@ static void r300_merge_textures_and_samplers(struct r300_context* r300) /* determine min/max levels */ /* the MAX_MIP level is the largest (finest) one */ max_level = MIN3(sampler->max_lod + view->base.first_level, - tex->b.b.last_level, view->base.last_level); + tex->desc.b.b.last_level, view->base.last_level); min_level = MIN2(sampler->min_lod + view->base.first_level, max_level); texstate->format.format0 |= R300_TX_NUM_LEVELS(max_level); diff --git a/src/gallium/drivers/r300/r300_texture.c b/src/gallium/drivers/r300/r300_texture.c index 711042722c..e99a4630ee 100644 --- a/src/gallium/drivers/r300/r300_texture.c +++ b/src/gallium/drivers/r300/r300_texture.c @@ -26,6 +26,7 @@ #include "r300_context.h" #include "r300_reg.h" +#include "r300_texture_desc.h" #include "r300_transfer.h" #include "r300_screen.h" #include "r300_winsys.h" @@ -37,11 +38,6 @@ #include "pipe/p_screen.h" -enum r300_dim { - DIM_WIDTH = 0, - DIM_HEIGHT = 1 -}; - unsigned r300_get_swizzle_combined(const unsigned char *swizzle_format, const unsigned char *swizzle_view) { @@ -544,17 +540,17 @@ static void r300_texture_setup_immutable_state(struct r300_screen* screen, struct r300_texture* tex) { struct r300_texture_format_state* f = &tex->tx_format; - struct pipe_resource *pt = &tex->b.b; + struct pipe_resource *pt = &tex->desc.b.b; boolean is_r500 = screen->caps.is_r500; /* Set sampler state. */ f->format0 = R300_TX_WIDTH((pt->width0 - 1) & 0x7ff) | R300_TX_HEIGHT((pt->height0 - 1) & 0x7ff); - if (tex->uses_pitch) { + if (tex->desc.uses_stride_addressing) { /* rectangles love this */ f->format0 |= R300_TX_PITCH_EN; - f->format2 = (tex->hwpitch[0] - 1) & 0x1fff; + f->format2 = (tex->desc.stride_in_pixels[0] - 1) & 0x1fff; } else { /* power of two textures (3D, mipmaps, and no pitch) */ f->format0 |= R300_TX_DEPTH(util_logbase2(pt->depth0) & 0xf); @@ -579,8 +575,8 @@ static void r300_texture_setup_immutable_state(struct r300_screen* screen, } } - f->tile_config = R300_TXO_MACRO_TILE(tex->macrotile) | - R300_TXO_MICRO_TILE(tex->microtile); + f->tile_config = R300_TXO_MACRO_TILE(tex->desc.macrotile[0]) | + R300_TXO_MICRO_TILE(tex->desc.microtile); } static void r300_texture_setup_fb_state(struct r300_screen* screen, @@ -589,23 +585,23 @@ static void r300_texture_setup_fb_state(struct r300_screen* screen, unsigned i; /* Set framebuffer state. */ - if (util_format_is_depth_or_stencil(tex->b.b.format)) { - for (i = 0; i <= tex->b.b.last_level; i++) { + if (util_format_is_depth_or_stencil(tex->desc.b.b.format)) { + for (i = 0; i <= tex->desc.b.b.last_level; i++) { tex->fb_state.pitch[i] = - tex->hwpitch[i] | - R300_DEPTHMACROTILE(tex->mip_macrotile[i]) | - R300_DEPTHMICROTILE(tex->microtile); + tex->desc.stride_in_pixels[i] | + R300_DEPTHMACROTILE(tex->desc.macrotile[i]) | + R300_DEPTHMICROTILE(tex->desc.microtile); } - tex->fb_state.format = r300_translate_zsformat(tex->b.b.format); + tex->fb_state.format = r300_translate_zsformat(tex->desc.b.b.format); } else { - for (i = 0; i <= tex->b.b.last_level; i++) { + for (i = 0; i <= tex->desc.b.b.last_level; i++) { tex->fb_state.pitch[i] = - tex->hwpitch[i] | - r300_translate_colorformat(tex->b.b.format) | - R300_COLOR_TILE(tex->mip_macrotile[i]) | - R300_COLOR_MICROTILE(tex->microtile); + tex->desc.stride_in_pixels[i] | + r300_translate_colorformat(tex->desc.b.b.format) | + R300_COLOR_TILE(tex->desc.macrotile[i]) | + R300_COLOR_MICROTILE(tex->desc.microtile); } - tex->fb_state.format = r300_translate_out_fmt(tex->b.b.format); + tex->fb_state.format = r300_translate_out_fmt(tex->desc.b.b.format); } } @@ -625,286 +621,6 @@ void r300_texture_reinterpret_format(struct pipe_screen *screen, r300_texture_setup_fb_state(r300_screen(screen), r300_texture(tex)); } -unsigned r300_texture_get_offset(struct r300_texture* tex, unsigned level, - unsigned zslice, unsigned face) -{ - unsigned offset = tex->offset[level]; - - switch (tex->b.b.target) { - case PIPE_TEXTURE_3D: - assert(face == 0); - return offset + zslice * tex->layer_size[level]; - - case PIPE_TEXTURE_CUBE: - assert(zslice == 0); - return offset + face * tex->layer_size[level]; - - default: - assert(zslice == 0 && face == 0); - return offset; - } -} - -/* Returns the number of pixels that the texture should be aligned to - * in the given dimension. */ -static unsigned r300_get_pixel_alignment(struct r300_texture *tex, - enum r300_buffer_tiling macrotile, - enum r300_dim dim) -{ - static const unsigned table[2][5][3][2] = - { - { - /* Macro: linear linear linear - Micro: linear tiled square-tiled */ - {{ 32, 1}, { 8, 4}, { 0, 0}}, /* 8 bits per pixel */ - {{ 16, 1}, { 8, 2}, { 4, 4}}, /* 16 bits per pixel */ - {{ 8, 1}, { 4, 2}, { 0, 0}}, /* 32 bits per pixel */ - {{ 4, 1}, { 0, 0}, { 2, 2}}, /* 64 bits per pixel */ - {{ 2, 1}, { 0, 0}, { 0, 0}} /* 128 bits per pixel */ - }, - { - /* Macro: tiled tiled tiled - Micro: linear tiled square-tiled */ - {{256, 8}, {64, 32}, { 0, 0}}, /* 8 bits per pixel */ - {{128, 8}, {64, 16}, {32, 32}}, /* 16 bits per pixel */ - {{ 64, 8}, {32, 16}, { 0, 0}}, /* 32 bits per pixel */ - {{ 32, 8}, { 0, 0}, {16, 16}}, /* 64 bits per pixel */ - {{ 16, 8}, { 0, 0}, { 0, 0}} /* 128 bits per pixel */ - } - }; - static const unsigned aa_block[2] = {4, 8}; - unsigned res = 0; - unsigned pixsize = util_format_get_blocksize(tex->b.b.format); - - assert(macrotile <= R300_BUFFER_TILED); - assert(tex->microtile <= R300_BUFFER_SQUARETILED); - assert(pixsize <= 16); - assert(dim <= DIM_HEIGHT); - - if (tex->b.b.nr_samples > 1) { - /* Multisampled textures have their own alignment scheme. */ - if (pixsize == 4) - res = aa_block[dim]; - } else { - /* Standard alignment. */ - res = table[macrotile][util_logbase2(pixsize)][tex->microtile][dim]; - } - - assert(res); - return res; -} - -/* Return true if macrotiling should be enabled on the miplevel. */ -static boolean r300_texture_macro_switch(struct r300_texture *tex, - unsigned level, - boolean rv350_mode, - enum r300_dim dim) -{ - unsigned tile, texdim; - - tile = r300_get_pixel_alignment(tex, R300_BUFFER_TILED, dim); - if (dim == DIM_WIDTH) { - texdim = u_minify(tex->b.b.width0, level); - } else { - texdim = u_minify(tex->b.b.height0, level); - } - - /* See TX_FILTER1_n.MACRO_SWITCH. */ - if (rv350_mode) { - return texdim >= tile; - } else { - return texdim > tile; - } -} - -/** - * Return the stride, in bytes, of the texture images of the given texture - * at the given level. - */ -unsigned r300_texture_get_stride(struct r300_screen* screen, - struct r300_texture* tex, unsigned level) -{ - unsigned tile_width, width, stride; - - if (tex->stride_override) - return tex->stride_override; - - /* Check the level. */ - if (level > tex->b.b.last_level) { - SCREEN_DBG(screen, DBG_TEX, "%s: level (%u) > last_level (%u)\n", - __FUNCTION__, level, tex->b.b.last_level); - return 0; - } - - width = u_minify(tex->b.b.width0, level); - - if (util_format_is_plain(tex->b.b.format)) { - tile_width = r300_get_pixel_alignment(tex, tex->mip_macrotile[level], - DIM_WIDTH); - width = align(width, tile_width); - - stride = util_format_get_stride(tex->b.b.format, width); - - /* Some IGPs need a minimum stride of 64 bytes, hmm... - * This doesn't seem to apply to tiled textures, according to r300c. */ - if (!tex->microtile && !tex->mip_macrotile[level] && - (screen->caps.family == CHIP_FAMILY_RS600 || - screen->caps.family == CHIP_FAMILY_RS690 || - screen->caps.family == CHIP_FAMILY_RS740)) { - return stride < 64 ? 64 : stride; - } - - /* The alignment to 32 bytes is sort of implied by the layout... */ - return stride; - } else { - return align(util_format_get_stride(tex->b.b.format, width), 32); - } -} - -static unsigned r300_texture_get_nblocksy(struct r300_texture* tex, - unsigned level) -{ - unsigned height, tile_height; - - height = u_minify(tex->b.b.height0, level); - - if (util_format_is_plain(tex->b.b.format)) { - tile_height = r300_get_pixel_alignment(tex, tex->mip_macrotile[level], - DIM_HEIGHT); - height = align(height, tile_height); - - /* This is needed for the kernel checker, unfortunately. */ - if ((tex->b.b.target != PIPE_TEXTURE_1D && - tex->b.b.target != PIPE_TEXTURE_2D) || - tex->b.b.last_level != 0) { - height = util_next_power_of_two(height); - } - } - - return util_format_get_nblocksy(tex->b.b.format, height); -} - -static void r300_texture_3d_fix_mipmapping(struct r300_screen *screen, - struct r300_texture *tex) -{ - /* The kernels <= 2.6.34-rc4 compute the size of mipmapped 3D textures - * incorrectly. This is a workaround to prevent CS from being rejected. */ - - unsigned i, size; - - if (!screen->rws->get_value(screen->rws, R300_VID_DRM_2_3_0) && - tex->b.b.target == PIPE_TEXTURE_3D && - tex->b.b.last_level > 0) { - size = 0; - - for (i = 0; i <= tex->b.b.last_level; i++) { - size += r300_texture_get_stride(screen, tex, i) * - r300_texture_get_nblocksy(tex, i); - } - - size *= tex->b.b.depth0; - tex->size = size; - } -} - -static void r300_setup_miptree(struct r300_screen* screen, - struct r300_texture* tex) -{ - struct pipe_resource* base = &tex->b.b; - unsigned stride, size, layer_size, nblocksy, i; - boolean rv350_mode = screen->caps.is_rv350; - - SCREEN_DBG(screen, DBG_TEXALLOC, - "r300: Making miptree for texture, format %s\n", - util_format_short_name(base->format)); - - for (i = 0; i <= base->last_level; i++) { - /* Let's see if this miplevel can be macrotiled. */ - tex->mip_macrotile[i] = - (tex->macrotile == R300_BUFFER_TILED && - r300_texture_macro_switch(tex, i, rv350_mode, DIM_WIDTH) && - r300_texture_macro_switch(tex, i, rv350_mode, DIM_HEIGHT)) ? - R300_BUFFER_TILED : R300_BUFFER_LINEAR; - - stride = r300_texture_get_stride(screen, tex, i); - nblocksy = r300_texture_get_nblocksy(tex, i); - layer_size = stride * nblocksy; - - if (base->nr_samples) { - layer_size *= base->nr_samples; - } - - if (base->target == PIPE_TEXTURE_CUBE) - size = layer_size * 6; - else - size = layer_size * u_minify(base->depth0, i); - - tex->offset[i] = tex->size; - tex->size = tex->offset[i] + size; - tex->layer_size[i] = layer_size; - tex->pitch[i] = stride / util_format_get_blocksize(base->format); - tex->hwpitch[i] = - tex->pitch[i] * util_format_get_blockwidth(base->format); - - SCREEN_DBG(screen, DBG_TEXALLOC, "r300: Texture miptree: Level %d " - "(%dx%dx%d px, pitch %d bytes) %d bytes total, macrotiled %s\n", - i, u_minify(base->width0, i), u_minify(base->height0, i), - u_minify(base->depth0, i), stride, tex->size, - tex->mip_macrotile[i] ? "TRUE" : "FALSE"); - } -} - -static void r300_setup_flags(struct r300_texture* tex) -{ - tex->uses_pitch = !util_is_power_of_two(tex->b.b.width0) || - !util_is_power_of_two(tex->b.b.height0) || - tex->stride_override; -} - -static void r300_setup_tiling(struct pipe_screen *screen, - struct r300_texture *tex) -{ - struct r300_winsys_screen *rws = (struct r300_winsys_screen *)screen->winsys; - enum pipe_format format = tex->b.b.format; - boolean rv350_mode = r300_screen(screen)->caps.is_rv350; - boolean is_zb = util_format_is_depth_or_stencil(format); - boolean dbg_no_tiling = SCREEN_DBG_ON(r300_screen(screen), DBG_NO_TILING); - - if (!util_format_is_plain(format)) { - return; - } - - /* If height == 1, disable microtiling except for zbuffer. */ - if (!is_zb && (tex->b.b.height0 == 1 || dbg_no_tiling)) { - return; - } - - /* Set microtiling. */ - switch (util_format_get_blocksize(format)) { - case 1: - case 4: - tex->microtile = R300_BUFFER_TILED; - break; - - case 2: - case 8: - if (rws->get_value(rws, R300_VID_SQUARE_TILING_SUPPORT)) { - tex->microtile = R300_BUFFER_SQUARETILED; - } - break; - } - - if (dbg_no_tiling) { - return; - } - - /* Set macrotiling. */ - if (r300_texture_macro_switch(tex, 0, rv350_mode, DIM_WIDTH) && - r300_texture_macro_switch(tex, 0, rv350_mode, DIM_HEIGHT)) { - tex->macrotile = R300_BUFFER_TILED; - } -} - static unsigned r300_texture_is_referenced(struct pipe_context *context, struct pipe_resource *texture, unsigned face, unsigned level) @@ -941,11 +657,10 @@ static boolean r300_texture_get_handle(struct pipe_screen* screen, } return rws->buffer_get_handle(rws, tex->buffer, - r300_texture_get_stride(r300_screen(screen), tex, 0), - whandle); + tex->desc.stride_in_bytes[0], whandle); } -struct u_resource_vtbl r300_texture_vtbl = +struct u_resource_vtbl r300_texture_vtbl = { r300_texture_get_handle, /* get_handle */ r300_texture_destroy, /* resource_destroy */ @@ -958,32 +673,69 @@ struct u_resource_vtbl r300_texture_vtbl = u_default_transfer_inline_write /* transfer_inline_write */ }; -static void r300_tex_print_info(struct r300_screen *rscreen, struct r300_texture *tex, - const char *func) +/* The common texture constructor. */ +static struct r300_texture* +r300_texture_create_object(struct r300_screen *rscreen, + const struct pipe_resource *base, + enum r300_buffer_tiling microtile, + enum r300_buffer_tiling macrotile, + unsigned stride_in_bytes_override, + unsigned max_buffer_size, + struct r300_winsys_buffer *buffer) { - fprintf(stderr, - "r300: %s: Macro: %s, Micro: %s, Pitch: %i, Dim: %ix%ix%i, " - "LastLevel: %i, Size: %i, Format: %s\n", - func, - tex->macrotile ? "YES" : " NO", - tex->microtile ? "YES" : " NO", - tex->hwpitch[0], - tex->b.b.width0, tex->b.b.height0, tex->b.b.depth0, - tex->b.b.last_level, tex->size, - util_format_short_name(tex->b.b.format)); + struct r300_winsys_screen *rws = rscreen->rws; + struct r300_texture *tex = CALLOC_STRUCT(r300_texture); + if (!tex) { + if (buffer) + rws->buffer_reference(rws, &buffer, NULL); + return NULL; + } + + /* Initialize the descriptor. */ + if (!r300_texture_desc_init(rscreen, &tex->desc, base, + microtile, macrotile, + stride_in_bytes_override, + max_buffer_size)) { + if (buffer) + rws->buffer_reference(rws, &buffer, NULL); + FREE(tex); + return NULL; + } + /* Initialize the hardware state. */ + r300_texture_setup_immutable_state(rscreen, tex); + r300_texture_setup_fb_state(rscreen, tex); + + tex->desc.b.vtbl = &r300_texture_vtbl; + pipe_reference_init(&tex->desc.b.b.reference, 1); + tex->domain = base->flags & R300_RESOURCE_FLAG_TRANSFER ? + R300_DOMAIN_GTT : + R300_DOMAIN_VRAM | R300_DOMAIN_GTT; + tex->buffer = buffer; + + /* Create the backing buffer if needed. */ + if (!tex->buffer) { + tex->buffer = rws->buffer_create(rws, tex->desc.size_in_bytes, 2048, + base->bind, base->usage, tex->domain); + + if (!tex->buffer) { + FREE(tex); + return NULL; + } + } + + rws->buffer_set_tiling(rws, tex->buffer, + tex->desc.microtile, tex->desc.macrotile[0], + tex->desc.stride_in_bytes[0]); + + return tex; } /* Create a new texture. */ -struct pipe_resource* r300_texture_create(struct pipe_screen* screen, - const struct pipe_resource* base) +struct pipe_resource *r300_texture_create(struct pipe_screen *screen, + const struct pipe_resource *base) { - struct r300_texture* tex = CALLOC_STRUCT(r300_texture); - struct r300_screen* rscreen = r300_screen(screen); - struct r300_winsys_screen *rws = (struct r300_winsys_screen *)screen->winsys; - - if (!tex) { - return NULL; - } + struct r300_screen *rscreen = r300_screen(screen); + enum r300_buffer_tiling microtile, macrotile; /* Refuse to create a texture with size 0. */ if (!base->width0 || @@ -993,53 +745,70 @@ struct pipe_resource* r300_texture_create(struct pipe_screen* screen, fprintf(stderr, "r300: texture_create: " "Got invalid texture dimensions: %ix%ix%i\n", base->width0, base->height0, base->depth0); - FREE(tex); return NULL; } - tex->b.b = *base; - tex->b.vtbl = &r300_texture_vtbl; - pipe_reference_init(&tex->b.b.reference, 1); - tex->b.b.screen = screen; - - r300_setup_flags(tex); - if (!(base->flags & R300_RESOURCE_FLAG_TRANSFER) && - !(base->bind & PIPE_BIND_SCANOUT)) { - r300_setup_tiling(screen, tex); + if ((base->flags & R300_RESOURCE_FLAG_TRANSFER) || + (base->bind & PIPE_BIND_SCANOUT)) { + microtile = R300_BUFFER_LINEAR; + macrotile = R300_BUFFER_LINEAR; + } else { + microtile = R300_BUFFER_SELECT_LAYOUT; + macrotile = R300_BUFFER_SELECT_LAYOUT; } - r300_setup_miptree(rscreen, tex); - r300_texture_3d_fix_mipmapping(rscreen, tex); - r300_texture_setup_immutable_state(rscreen, tex); - r300_texture_setup_fb_state(rscreen, tex); - if (SCREEN_DBG_ON(rscreen, DBG_TEX)) { - r300_tex_print_info(rscreen, tex, "texture_create"); + return (struct pipe_resource*) + r300_texture_create_object(rscreen, base, microtile, macrotile, + 0, 0, NULL); +} + +struct pipe_resource *r300_texture_from_handle(struct pipe_screen *screen, + const struct pipe_resource *base, + struct winsys_handle *whandle) +{ + struct r300_winsys_screen *rws = (struct r300_winsys_screen*)screen->winsys; + struct r300_screen *rscreen = r300_screen(screen); + struct r300_winsys_buffer *buffer; + enum r300_buffer_tiling microtile, macrotile; + unsigned stride, size; + + /* Support only 2D textures without mipmaps */ + if (base->target != PIPE_TEXTURE_2D || + base->depth0 != 1 || + base->last_level != 0) { + return NULL; } - tex->domain = base->flags & R300_RESOURCE_FLAG_TRANSFER ? - R300_DOMAIN_GTT : - R300_DOMAIN_VRAM | R300_DOMAIN_GTT; + buffer = rws->buffer_from_handle(rws, whandle, &stride, &size); + if (!buffer) + return NULL; - tex->buffer = rws->buffer_create(rws, tex->size, 2048, base->bind, - base->usage, tex->domain); - tex->buffer_size = tex->size; + rws->buffer_get_tiling(rws, buffer, µtile, ¯otile); - if (!tex->buffer) { - FREE(tex); - return NULL; - } + /* Enforce a microtiled zbuffer. */ + if (util_format_is_depth_or_stencil(base->format) && + microtile == R300_BUFFER_LINEAR) { + switch (util_format_get_blocksize(base->format)) { + case 4: + microtile = R300_BUFFER_TILED; + break; - rws->buffer_set_tiling(rws, tex->buffer, - tex->microtile, tex->macrotile, - tex->pitch[0] * util_format_get_blocksize(tex->b.b.format)); + case 2: + if (rws->get_value(rws, R300_VID_SQUARE_TILING_SUPPORT)) + microtile = R300_BUFFER_SQUARETILED; + break; + } + } - return (struct pipe_resource*)tex; + return (struct pipe_resource*) + r300_texture_create_object(rscreen, base, microtile, macrotile, + stride, size, buffer); } /* Not required to implement u_resource_vtbl, consider moving to another file: */ struct pipe_surface* r300_get_tex_surface(struct pipe_screen* screen, - struct pipe_resource* texture, + struct pipe_resource* texture, unsigned face, unsigned level, unsigned zslice, @@ -1049,7 +818,7 @@ struct pipe_surface* r300_get_tex_surface(struct pipe_screen* screen, struct r300_surface* surface = CALLOC_STRUCT(r300_surface); if (surface) { - uint32_t stride, offset, tile_height; + uint32_t offset, tile_height; pipe_reference_init(&surface->base.reference, 1); pipe_resource_reference(&surface->base.texture, texture); @@ -1068,23 +837,29 @@ struct pipe_surface* r300_get_tex_surface(struct pipe_screen* screen, if (surface->domain & R300_DOMAIN_VRAM) surface->domain &= ~R300_DOMAIN_GTT; - surface->offset = r300_texture_get_offset(tex, level, zslice, face); + surface->offset = r300_texture_get_offset(&tex->desc, + level, zslice, face); surface->pitch = tex->fb_state.pitch[level]; surface->format = tex->fb_state.format; /* Parameters for the CBZB clear. */ + surface->cbzb_allowed = tex->desc.cbzb_allowed[level]; surface->cbzb_width = align(surface->base.width, 64); /* Height must be aligned to the size of a tile. */ - tile_height = r300_get_pixel_alignment(tex, tex->mip_macrotile[level], + tile_height = r300_get_pixel_alignment(tex->desc.b.b.format, + tex->desc.b.b.nr_samples, + tex->desc.microtile, + tex->desc.macrotile[level], DIM_HEIGHT); + surface->cbzb_height = align((surface->base.height + 1) / 2, tile_height); /* Offset must be aligned to 2K and must point at the beginning * of a scanline. */ - stride = r300_texture_get_stride(r300_screen(screen), tex, level); - offset = surface->offset + stride * surface->cbzb_height; + offset = surface->offset + + tex->desc.stride_in_bytes[level] * surface->cbzb_height; surface->cbzb_midpoint_offset = offset & ~2047; surface->cbzb_pitch = surface->pitch & 0x1ffffc; @@ -1098,7 +873,7 @@ struct pipe_surface* r300_get_tex_surface(struct pipe_screen* screen, "CBZB Dim: %ix%i, Misalignment: %i, Macro: %s\n", surface->cbzb_width, surface->cbzb_height, offset & 2047, - tex->mip_macrotile[level] ? "YES" : " NO"); + tex->desc.macrotile[level] ? "YES" : " NO"); } return &surface->base; @@ -1111,95 +886,3 @@ void r300_tex_surface_destroy(struct pipe_surface* s) pipe_resource_reference(&s->texture, NULL); FREE(s); } - -struct pipe_resource* -r300_texture_from_handle(struct pipe_screen* screen, - const struct pipe_resource* base, - struct winsys_handle *whandle) -{ - struct r300_winsys_screen *rws = (struct r300_winsys_screen*)screen->winsys; - struct r300_screen* rscreen = r300_screen(screen); - struct r300_winsys_buffer *buffer; - struct r300_texture* tex; - unsigned stride, size; - boolean override_zb_flags; - - /* Support only 2D textures without mipmaps */ - if (base->target != PIPE_TEXTURE_2D || - base->depth0 != 1 || - base->last_level != 0) { - return NULL; - } - - buffer = rws->buffer_from_handle(rws, whandle, &stride, &size); - if (!buffer) { - return NULL; - } - - tex = CALLOC_STRUCT(r300_texture); - if (!tex) { - return NULL; - } - - tex->b.b = *base; - tex->b.vtbl = &r300_texture_vtbl; - pipe_reference_init(&tex->b.b.reference, 1); - tex->b.b.screen = screen; - tex->domain = R300_DOMAIN_VRAM; - - tex->stride_override = stride; - - /* one ref already taken */ - tex->buffer = buffer; - tex->buffer_size = size; - - rws->buffer_get_tiling(rws, buffer, &tex->microtile, &tex->macrotile); - r300_setup_flags(tex); - - /* Enforce microtiled zbuffer. */ - override_zb_flags = util_format_is_depth_or_stencil(base->format) && - tex->microtile == R300_BUFFER_LINEAR; - - if (override_zb_flags) { - switch (util_format_get_blocksize(base->format)) { - case 4: - tex->microtile = R300_BUFFER_TILED; - break; - - case 2: - if (rws->get_value(rws, R300_VID_SQUARE_TILING_SUPPORT)) { - tex->microtile = R300_BUFFER_SQUARETILED; - break; - } - /* Pass through. */ - - default: - override_zb_flags = FALSE; - } - } - - r300_setup_miptree(rscreen, tex); - r300_texture_setup_immutable_state(rscreen, tex); - r300_texture_setup_fb_state(rscreen, tex); - - if (override_zb_flags) { - rws->buffer_set_tiling(rws, tex->buffer, - tex->microtile, tex->macrotile, - tex->pitch[0] * util_format_get_blocksize(tex->b.b.format)); - } - - /* Make sure the buffer we got is large enough. */ - if (tex->size > tex->buffer_size) { - fprintf(stderr, "r300: texture_from_handle: The buffer is not " - "large enough. Got: %i, Need: %i, Info:\n", - tex->buffer_size, tex->size); - r300_tex_print_info(rscreen, tex, "texture_from_handle"); - pipe_resource_reference((struct pipe_resource**)&tex, NULL); - return NULL; - } else { - if (SCREEN_DBG_ON(rscreen, DBG_TEX)) - r300_tex_print_info(rscreen, tex, "texture_from_handle"); - } - - return (struct pipe_resource*)tex; -} diff --git a/src/gallium/drivers/r300/r300_texture.h b/src/gallium/drivers/r300/r300_texture.h index 99e7694254..585036ab3b 100644 --- a/src/gallium/drivers/r300/r300_texture.h +++ b/src/gallium/drivers/r300/r300_texture.h @@ -39,12 +39,6 @@ uint32_t r300_translate_texformat(enum pipe_format format, uint32_t r500_tx_format_msb_bit(enum pipe_format format); -unsigned r300_texture_get_stride(struct r300_screen* screen, - struct r300_texture* tex, unsigned level); - -unsigned r300_texture_get_offset(struct r300_texture* tex, unsigned level, - unsigned zslice, unsigned face); - void r300_texture_reinterpret_format(struct pipe_screen *screen, struct pipe_resource *tex, enum pipe_format new_format); diff --git a/src/gallium/drivers/r300/r300_texture_desc.c b/src/gallium/drivers/r300/r300_texture_desc.c new file mode 100644 index 0000000000..18a2bd31fd --- /dev/null +++ b/src/gallium/drivers/r300/r300_texture_desc.c @@ -0,0 +1,456 @@ +/* + * Copyright 2008 Corbin Simpson + * Copyright 2010 Marek Olšák + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#include "r300_texture_desc.h" + +#include "r300_context.h" +#include "r300_winsys.h" + +#include "util/u_format.h" + +/* Returns the number of pixels that the texture should be aligned to + * in the given dimension. */ +unsigned r300_get_pixel_alignment(enum pipe_format format, + unsigned num_samples, + enum r300_buffer_tiling microtile, + enum r300_buffer_tiling macrotile, + enum r300_dim dim) +{ + static const unsigned table[2][5][3][2] = + { + { + /* Macro: linear linear linear + Micro: linear tiled square-tiled */ + {{ 32, 1}, { 8, 4}, { 0, 0}}, /* 8 bits per pixel */ + {{ 16, 1}, { 8, 2}, { 4, 4}}, /* 16 bits per pixel */ + {{ 8, 1}, { 4, 2}, { 0, 0}}, /* 32 bits per pixel */ + {{ 4, 1}, { 0, 0}, { 2, 2}}, /* 64 bits per pixel */ + {{ 2, 1}, { 0, 0}, { 0, 0}} /* 128 bits per pixel */ + }, + { + /* Macro: tiled tiled tiled + Micro: linear tiled square-tiled */ + {{256, 8}, {64, 32}, { 0, 0}}, /* 8 bits per pixel */ + {{128, 8}, {64, 16}, {32, 32}}, /* 16 bits per pixel */ + {{ 64, 8}, {32, 16}, { 0, 0}}, /* 32 bits per pixel */ + {{ 32, 8}, { 0, 0}, {16, 16}}, /* 64 bits per pixel */ + {{ 16, 8}, { 0, 0}, { 0, 0}} /* 128 bits per pixel */ + } + }; + static const unsigned aa_block[2] = {4, 8}; + unsigned tile = 0; + unsigned pixsize = util_format_get_blocksize(format); + + assert(macrotile <= R300_BUFFER_TILED); + assert(microtile <= R300_BUFFER_SQUARETILED); + assert(pixsize <= 16); + assert(dim <= DIM_HEIGHT); + + if (num_samples > 1) { + /* Multisampled textures have their own alignment scheme. */ + if (pixsize == 4) + tile = aa_block[dim]; + /* XXX FP16 AA. */ + } else { + /* Standard alignment. */ + tile = table[macrotile][util_logbase2(pixsize)][microtile][dim]; + } + + assert(tile); + return tile; +} + +/* Return true if macrotiling should be enabled on the miplevel. */ +static boolean r300_texture_macro_switch(struct r300_texture_desc *desc, + unsigned level, + boolean rv350_mode, + enum r300_dim dim) +{ + unsigned tile, texdim; + + tile = r300_get_pixel_alignment(desc->b.b.format, desc->b.b.nr_samples, + desc->microtile, R300_BUFFER_TILED, dim); + if (dim == DIM_WIDTH) { + texdim = u_minify(desc->b.b.width0, level); + } else { + texdim = u_minify(desc->b.b.height0, level); + } + + /* See TX_FILTER1_n.MACRO_SWITCH. */ + if (rv350_mode) { + return texdim >= tile; + } else { + return texdim > tile; + } +} + +/** + * Return the stride, in bytes, of the texture image of the given texture + * at the given level. + */ +static unsigned r300_texture_get_stride(struct r300_screen *screen, + struct r300_texture_desc *desc, + unsigned level) +{ + unsigned tile_width, width, stride; + + if (desc->stride_in_bytes_override) + return desc->stride_in_bytes_override; + + /* Check the level. */ + if (level > desc->b.b.last_level) { + SCREEN_DBG(screen, DBG_TEX, "%s: level (%u) > last_level (%u)\n", + __FUNCTION__, level, desc->b.b.last_level); + return 0; + } + + width = u_minify(desc->b.b.width0, level); + + if (util_format_is_plain(desc->b.b.format)) { + tile_width = r300_get_pixel_alignment(desc->b.b.format, + desc->b.b.nr_samples, + desc->microtile, + desc->macrotile[level], + DIM_WIDTH); + width = align(width, tile_width); + + stride = util_format_get_stride(desc->b.b.format, width); + + /* Some IGPs need a minimum stride of 64 bytes, hmm... + * This doesn't seem to apply to tiled textures, according to r300c. */ + if (!desc->microtile && !desc->macrotile[level] && + (screen->caps.family == CHIP_FAMILY_RS600 || + screen->caps.family == CHIP_FAMILY_RS690 || + screen->caps.family == CHIP_FAMILY_RS740)) { + return stride < 64 ? 64 : stride; + } + + /* The alignment to 32 bytes is sort of implied by the layout... */ + return stride; + } else { + return align(util_format_get_stride(desc->b.b.format, width), 32); + } +} + +static unsigned r300_texture_get_nblocksy(struct r300_texture_desc *desc, + unsigned level, + boolean align_for_cbzb) +{ + unsigned height, tile_height; + + height = u_minify(desc->b.b.height0, level); + + if (util_format_is_plain(desc->b.b.format)) { + tile_height = r300_get_pixel_alignment(desc->b.b.format, + desc->b.b.nr_samples, + desc->microtile, + desc->macrotile[level], + DIM_HEIGHT); + height = align(height, tile_height); + + /* This is needed for the kernel checker, unfortunately. */ + if ((desc->b.b.target != PIPE_TEXTURE_1D && + desc->b.b.target != PIPE_TEXTURE_2D) || + desc->b.b.last_level != 0) { + height = util_next_power_of_two(height); + } + + /* Allocate an even number of macrotiles for the CBZB clear. + * Do so for 3 or more macrotiles in the Y direction. */ + if (align_for_cbzb && + level == 0 && desc->b.b.last_level == 0 && + desc->macrotile[0] && height >= tile_height * 3) { + height = align(height, tile_height * 2); + } + } + + return util_format_get_nblocksy(desc->b.b.format, height); +} + +static void r300_texture_3d_fix_mipmapping(struct r300_screen *screen, + struct r300_texture_desc *desc) +{ + /* The kernels <= 2.6.34-rc4 compute the size of mipmapped 3D textures + * incorrectly. This is a workaround to prevent CS from being rejected. */ + + unsigned i, size; + + if (!screen->rws->get_value(screen->rws, R300_VID_DRM_2_3_0) && + desc->b.b.target == PIPE_TEXTURE_3D && + desc->b.b.last_level > 0) { + size = 0; + + for (i = 0; i <= desc->b.b.last_level; i++) { + size += desc->stride_in_bytes[i] * + r300_texture_get_nblocksy(desc, i, FALSE); + } + + size *= desc->b.b.depth0; + desc->size_in_bytes = size; + } +} + +static void r300_setup_miptree(struct r300_screen *screen, + struct r300_texture_desc *desc) +{ + struct pipe_resource *base = &desc->b.b; + unsigned stride, size, layer_size, nblocksy, i; + boolean rv350_mode = screen->caps.is_rv350; + + SCREEN_DBG(screen, DBG_TEXALLOC, + "r300: Making miptree for texture, format %s\n", + util_format_short_name(base->format)); + + for (i = 0; i <= base->last_level; i++) { + /* Let's see if this miplevel can be macrotiled. */ + desc->macrotile[i] = + (desc->macrotile[0] == R300_BUFFER_TILED && + r300_texture_macro_switch(desc, i, rv350_mode, DIM_WIDTH) && + r300_texture_macro_switch(desc, i, rv350_mode, DIM_HEIGHT)) ? + R300_BUFFER_TILED : R300_BUFFER_LINEAR; + + stride = r300_texture_get_stride(screen, desc, i); + nblocksy = r300_texture_get_nblocksy(desc, i, desc->stride_in_bytes_override == 0); + layer_size = stride * nblocksy; + + if (base->nr_samples) { + layer_size *= base->nr_samples; + } + + if (base->target == PIPE_TEXTURE_CUBE) + size = layer_size * 6; + else + size = layer_size * u_minify(base->depth0, i); + + desc->offset_in_bytes[i] = desc->size_in_bytes; + desc->size_in_bytes = desc->offset_in_bytes[i] + size; + desc->layer_size_in_bytes[i] = layer_size; + desc->stride_in_bytes[i] = stride; + desc->stride_in_pixels[i] = + (stride / util_format_get_blocksize(base->format)) * + util_format_get_blockwidth(base->format); + + SCREEN_DBG(screen, DBG_TEXALLOC, "r300: Texture miptree: Level %d " + "(%dx%dx%d px, pitch %d bytes) %d bytes total, macrotiled %s\n", + i, u_minify(base->width0, i), u_minify(base->height0, i), + u_minify(base->depth0, i), stride, desc->size_in_bytes, + desc->macrotile[i] ? "TRUE" : "FALSE"); + } +} + +static void r300_setup_flags(struct r300_texture_desc *desc) +{ + desc->uses_stride_addressing = + !util_is_power_of_two(desc->b.b.width0) || + !util_is_power_of_two(desc->b.b.height0) || + desc->stride_in_bytes_override; +} + +static void r300_setup_cbzb_flags(struct r300_screen *rscreen, + struct r300_texture_desc *desc) +{ + unsigned i, bpp; + boolean first_level_valid; + + bpp = util_format_get_blocksizebits(desc->b.b.format); + + /* 1) The texture must be point-sampled, + * 2) The depth must be 16 or 32 bits. + * 3) If the midpoint ZB offset is not aligned to 2048, it returns garbage + * with certain texture sizes. Macrotiling ensures the alignment. */ + first_level_valid = desc->b.b.nr_samples <= 1 && + (bpp == 16 || bpp == 32) && + desc->macrotile[0]; + + for (i = 0; i <= desc->b.b.last_level; i++) + desc->cbzb_allowed[i] = first_level_valid && desc->macrotile[i]; + return; +#if 0 + /* When clearing, the layer (width*height) is horizontally split + * into two, and the upper and lower halves are cleared by the CB + * and ZB units, respectively. Therefore, the number of macrotiles + * in the Y direction must be even. */ + + if (desc->b.b.last_level > 0 || + desc->b.b.target == PIPE_TEXTURE_3D || + desc->b.b.target == PIPE_TEXTURE_CUBE) { + /* For mipmapped, 3D, or cube textures, just check if there are + * enough macrotiles per layer. */ + for (i = 0; i <= desc->b.b.last_level; i++) { + desc->cbzb_allowed[i] = FALSE; + + if (first_level_valid && desc->macrotile[i]) { + unsigned height, tile_height, num_macrotiles; + + /* Compute the number of macrotiles in the Y direction. */ + tile_height = r300_get_pixel_alignment(desc->b.b.format, + desc->b.b.nr_samples, + desc->microtile, + R300_BUFFER_TILED, + DIM_HEIGHT); + height = r300_texture_get_height(desc, i); + num_macrotiles = height / tile_height; + + desc->cbzb_allowed[i] = num_macrotiles % 2 == 0; + } + } + } else { + /* For 1D and 2D non-mipmapped textures */ + unsigned layer_size; + + layer_size = desc->stride_in_bytes[0] * + r300_texture_get_nblocksy(desc, 0, TRUE); + } +#endif +} + +static void r300_setup_tiling(struct r300_screen *screen, + struct r300_texture_desc *desc) +{ + struct r300_winsys_screen *rws = screen->rws; + enum pipe_format format = desc->b.b.format; + boolean rv350_mode = screen->caps.is_rv350; + boolean is_zb = util_format_is_depth_or_stencil(format); + boolean dbg_no_tiling = SCREEN_DBG_ON(screen, DBG_NO_TILING); + + if (!util_format_is_plain(format)) { + return; + } + + /* If height == 1, disable microtiling except for zbuffer. */ + if (!is_zb && (desc->b.b.height0 == 1 || dbg_no_tiling)) { + return; + } + + /* Set microtiling. */ + switch (util_format_get_blocksize(format)) { + case 1: + case 4: + desc->microtile = R300_BUFFER_TILED; + break; + + case 2: + case 8: + if (rws->get_value(rws, R300_VID_SQUARE_TILING_SUPPORT)) { + desc->microtile = R300_BUFFER_SQUARETILED; + } + break; + } + + if (dbg_no_tiling) { + return; + } + + /* Set macrotiling. */ + if (r300_texture_macro_switch(desc, 0, rv350_mode, DIM_WIDTH) && + r300_texture_macro_switch(desc, 0, rv350_mode, DIM_HEIGHT)) { + desc->macrotile[0] = R300_BUFFER_TILED; + } +} + +static void r300_tex_print_info(struct r300_screen *rscreen, + struct r300_texture_desc *desc, + const char *func) +{ + fprintf(stderr, + "r300: %s: Macro: %s, Micro: %s, Pitch: %i, Dim: %ix%ix%i, " + "LastLevel: %i, Size: %i, Format: %s\n", + func, + desc->macrotile[0] ? "YES" : " NO", + desc->microtile ? "YES" : " NO", + desc->stride_in_pixels[0], + desc->b.b.width0, desc->b.b.height0, desc->b.b.depth0, + desc->b.b.last_level, desc->size_in_bytes, + util_format_short_name(desc->b.b.format)); +} + +boolean r300_texture_desc_init(struct r300_screen *rscreen, + struct r300_texture_desc *desc, + const struct pipe_resource *base, + enum r300_buffer_tiling microtile, + enum r300_buffer_tiling macrotile, + unsigned stride_in_bytes_override, + unsigned max_buffer_size) +{ + desc->b.b = *base; + desc->b.b.screen = &rscreen->screen; + + desc->stride_in_bytes_override = stride_in_bytes_override; + + r300_setup_flags(desc); + + if (microtile == R300_BUFFER_SELECT_LAYOUT || + macrotile == R300_BUFFER_SELECT_LAYOUT) { + r300_setup_tiling(rscreen, desc); + } else { + desc->microtile = microtile; + desc->macrotile[0] = macrotile; + assert(desc->b.b.last_level == 0); + } + + r300_setup_miptree(rscreen, desc); + r300_texture_3d_fix_mipmapping(rscreen, desc); + r300_setup_cbzb_flags(rscreen, desc); + + if (max_buffer_size) { + /* Make sure the buffer we got is large enough. */ + if (desc->size_in_bytes > max_buffer_size) { + fprintf(stderr, "r300: texture_from_handle: The buffer is not " + "large enough. Got: %i, Need: %i, Info:\n", + max_buffer_size, desc->size_in_bytes); + r300_tex_print_info(rscreen, desc, "texture_from_handle"); + return FALSE; + } + + desc->buffer_size_in_bytes = max_buffer_size; + } else { + desc->buffer_size_in_bytes = desc->size_in_bytes; + + } + + if (SCREEN_DBG_ON(rscreen, DBG_TEX)) + r300_tex_print_info(rscreen, desc, "texture_from_handle"); + + return TRUE; +} + +unsigned r300_texture_get_offset(struct r300_texture_desc *desc, + unsigned level, unsigned zslice, + unsigned face) +{ + unsigned offset = desc->offset_in_bytes[level]; + + switch (desc->b.b.target) { + case PIPE_TEXTURE_3D: + assert(face == 0); + return offset + zslice * desc->layer_size_in_bytes[level]; + + case PIPE_TEXTURE_CUBE: + assert(zslice == 0); + return offset + face * desc->layer_size_in_bytes[level]; + + default: + assert(zslice == 0 && face == 0); + return offset; + } +} diff --git a/src/gallium/drivers/r300/r300_texture_desc.h b/src/gallium/drivers/r300/r300_texture_desc.h new file mode 100644 index 0000000000..95de66f654 --- /dev/null +++ b/src/gallium/drivers/r300/r300_texture_desc.h @@ -0,0 +1,57 @@ +/* + * Copyright 2008 Corbin Simpson + * Copyright 2010 Marek Olšák + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#ifndef R300_TEXTURE_DESC_H +#define R300_TEXTURE_DESC_H + +#include "r300_defines.h" + +struct pipe_resource; +struct r300_screen; +struct r300_texture_desc; +struct r300_texture; + +enum r300_dim { + DIM_WIDTH = 0, + DIM_HEIGHT = 1 +}; + +unsigned r300_get_pixel_alignment(enum pipe_format format, + unsigned num_samples, + enum r300_buffer_tiling microtile, + enum r300_buffer_tiling macrotile, + enum r300_dim dim); + +boolean r300_texture_desc_init(struct r300_screen *rscreen, + struct r300_texture_desc *desc, + const struct pipe_resource *base, + enum r300_buffer_tiling microtile, + enum r300_buffer_tiling macrotile, + unsigned stride_in_bytes_override, + unsigned max_buffer_size); + +unsigned r300_texture_get_offset(struct r300_texture_desc *desc, + unsigned level, unsigned zslice, + unsigned face); + +#endif diff --git a/src/gallium/drivers/r300/r300_transfer.c b/src/gallium/drivers/r300/r300_transfer.c index 3cc4c8c958..e9333b35ef 100644 --- a/src/gallium/drivers/r300/r300_transfer.c +++ b/src/gallium/drivers/r300/r300_transfer.c @@ -22,7 +22,7 @@ * USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include "r300_transfer.h" -#include "r300_texture.h" +#include "r300_texture_desc.h" #include "r300_screen_buffer.h" #include "util/u_memory.h" @@ -35,8 +35,8 @@ struct r300_transfer { /* Offset from start of buffer. */ unsigned offset; - /* Detiled texture. */ - struct r300_texture *detiled_texture; + /* Linear texture. */ + struct r300_texture *linear_texture; }; /* Convenience cast wrapper. */ @@ -57,7 +57,7 @@ static void r300_copy_from_tiled_texture(struct pipe_context *ctx, subdst.face = 0; subdst.level = 0; - ctx->resource_copy_region(ctx, &r300transfer->detiled_texture->b.b, subdst, + ctx->resource_copy_region(ctx, &r300transfer->linear_texture->desc.b.b, subdst, 0, 0, 0, tex, transfer->sr, transfer->box.x, transfer->box.y, transfer->box.z, @@ -77,7 +77,7 @@ static void r300_copy_into_tiled_texture(struct pipe_context *ctx, ctx->resource_copy_region(ctx, tex, transfer->sr, transfer->box.x, transfer->box.y, transfer->box.z, - &r300transfer->detiled_texture->b.b, subsrc, + &r300transfer->linear_texture->desc.b.b, subsrc, 0, 0, 0, transfer->box.width, transfer->box.height); @@ -93,7 +93,6 @@ r300_texture_get_transfer(struct pipe_context *ctx, { struct r300_context *r300 = r300_context(ctx); struct r300_texture *tex = r300_texture(texture); - struct r300_screen *r300screen = r300_screen(ctx->screen); struct r300_transfer *trans; struct pipe_resource base; boolean referenced_cs, referenced_hw, blittable; @@ -124,7 +123,7 @@ r300_texture_get_transfer(struct pipe_context *ctx, /* If the texture is tiled, we must create a temporary detiled texture * for this transfer. * Also make write transfers pipelined. */ - if (tex->microtile || tex->macrotile || + if (tex->desc.microtile || tex->desc.macrotile[sr.level] || ((referenced_hw & !(usage & PIPE_TRANSFER_READ)) && blittable)) { base.target = PIPE_TEXTURE_2D; base.format = texture->format; @@ -149,23 +148,23 @@ r300_texture_get_transfer(struct pipe_context *ctx, } /* Create the temporary texture. */ - trans->detiled_texture = r300_texture( + trans->linear_texture = r300_texture( ctx->screen->resource_create(ctx->screen, &base)); - if (!trans->detiled_texture) { + if (!trans->linear_texture) { /* Oh crap, the thing can't create the texture. * Let's flush and try again. */ ctx->flush(ctx, 0, NULL); - trans->detiled_texture = r300_texture( + trans->linear_texture = r300_texture( ctx->screen->resource_create(ctx->screen, &base)); - if (!trans->detiled_texture) { + if (!trans->linear_texture) { /* For linear textures, it's safe to fallback to * an unpipelined transfer. */ - if (!tex->microtile && !tex->macrotile) { + if (!tex->desc.microtile && !tex->desc.macrotile[sr.level]) { goto unpipelined; } @@ -177,8 +176,8 @@ r300_texture_get_transfer(struct pipe_context *ctx, } } - assert(!trans->detiled_texture->microtile && - !trans->detiled_texture->macrotile); + assert(!trans->linear_texture->desc.microtile && + !trans->linear_texture->desc.macrotile[0]); /* Set the stride. * @@ -188,7 +187,7 @@ r300_texture_get_transfer(struct pipe_context *ctx, * right thing internally. */ trans->transfer.stride = - r300_texture_get_stride(r300screen, trans->detiled_texture, 0); + trans->linear_texture->desc.stride_in_bytes[0]; if (usage & PIPE_TRANSFER_READ) { /* We cannot map a tiled texture directly because the data is @@ -203,9 +202,9 @@ r300_texture_get_transfer(struct pipe_context *ctx, unpipelined: /* Unpipelined transfer. */ - trans->transfer.stride = - r300_texture_get_stride(r300screen, tex, sr.level); - trans->offset = r300_texture_get_offset(tex, sr.level, box->z, sr.face); + trans->transfer.stride = tex->desc.stride_in_bytes[sr.level]; + trans->offset = r300_texture_get_offset(&tex->desc, + sr.level, box->z, sr.face); if (referenced_cs) ctx->flush(ctx, PIPE_FLUSH_RENDER_CACHE, NULL); @@ -219,13 +218,13 @@ void r300_texture_transfer_destroy(struct pipe_context *ctx, { struct r300_transfer *r300transfer = r300_transfer(trans); - if (r300transfer->detiled_texture) { + if (r300transfer->linear_texture) { if (trans->usage & PIPE_TRANSFER_WRITE) { r300_copy_into_tiled_texture(ctx, r300transfer); } pipe_resource_reference( - (struct pipe_resource**)&r300transfer->detiled_texture, NULL); + (struct pipe_resource**)&r300transfer->linear_texture, NULL); } pipe_resource_reference(&trans->resource, NULL); FREE(trans); @@ -239,13 +238,13 @@ void* r300_texture_transfer_map(struct pipe_context *ctx, struct r300_transfer *r300transfer = r300_transfer(transfer); struct r300_texture *tex = r300_texture(transfer->resource); char *map; - enum pipe_format format = tex->b.b.format; + enum pipe_format format = tex->desc.b.b.format; - if (r300transfer->detiled_texture) { + if (r300transfer->linear_texture) { /* The detiled texture is of the same size as the region being mapped * (no offset needed). */ return rws->buffer_map(rws, - r300transfer->detiled_texture->buffer, + r300transfer->linear_texture->buffer, r300->cs, transfer->usage); } else { @@ -270,8 +269,8 @@ void r300_texture_transfer_unmap(struct pipe_context *ctx, struct r300_transfer *r300transfer = r300_transfer(transfer); struct r300_texture *tex = r300_texture(transfer->resource); - if (r300transfer->detiled_texture) { - rws->buffer_unmap(rws, r300transfer->detiled_texture->buffer); + if (r300transfer->linear_texture) { + rws->buffer_unmap(rws, r300transfer->linear_texture->buffer); } else { rws->buffer_unmap(rws, tex->buffer); } -- cgit v1.2.3 From c92d232061c1aef6f5f56cbd815625778db2fd8c Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Sat, 24 Jul 2010 22:52:01 +0200 Subject: r300g: do not use TXPITCH_EN for power-of-two textures from the DDX We were using TXPITCH_EN for textures from the DDX since ever, for nothing. --- src/gallium/drivers/r300/r300_texture_desc.c | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_texture_desc.c b/src/gallium/drivers/r300/r300_texture_desc.c index 18a2bd31fd..becaa59bea 100644 --- a/src/gallium/drivers/r300/r300_texture_desc.c +++ b/src/gallium/drivers/r300/r300_texture_desc.c @@ -210,6 +210,14 @@ static void r300_texture_3d_fix_mipmapping(struct r300_screen *screen, } } +/* Get a width in pixels from a stride in bytes. */ +static unsigned stride_to_width(enum pipe_format format, + unsigned stride_in_bytes) +{ + return (stride_in_bytes / util_format_get_blocksize(format)) * + util_format_get_blockwidth(format); +} + static void r300_setup_miptree(struct r300_screen *screen, struct r300_texture_desc *desc) { @@ -246,9 +254,7 @@ static void r300_setup_miptree(struct r300_screen *screen, desc->size_in_bytes = desc->offset_in_bytes[i] + size; desc->layer_size_in_bytes[i] = layer_size; desc->stride_in_bytes[i] = stride; - desc->stride_in_pixels[i] = - (stride / util_format_get_blocksize(base->format)) * - util_format_get_blockwidth(base->format); + desc->stride_in_pixels[i] = stride_to_width(desc->b.b.format, stride); SCREEN_DBG(screen, DBG_TEXALLOC, "r300: Texture miptree: Level %d " "(%dx%dx%d px, pitch %d bytes) %d bytes total, macrotiled %s\n", @@ -261,9 +267,11 @@ static void r300_setup_miptree(struct r300_screen *screen, static void r300_setup_flags(struct r300_texture_desc *desc) { desc->uses_stride_addressing = - !util_is_power_of_two(desc->b.b.width0) || - !util_is_power_of_two(desc->b.b.height0) || - desc->stride_in_bytes_override; + !util_is_power_of_two(desc->b.b.width0) || + !util_is_power_of_two(desc->b.b.height0) || + (desc->stride_in_bytes_override && + stride_to_width(desc->b.b.format, + desc->stride_in_bytes_override) != desc->b.b.width0); } static void r300_setup_cbzb_flags(struct r300_screen *rscreen, -- cgit v1.2.3 From 49330fc5ac13e25cb201e62995329cffaf5046f0 Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Sat, 24 Jul 2010 23:05:40 +0200 Subject: r300g: do not use TXPITCH_EN if the width is POT and the height is NPOT --- src/gallium/drivers/r300/r300_context.h | 13 ++++++------- src/gallium/drivers/r300/r300_fs.c | 2 +- src/gallium/drivers/r300/r300_state.c | 2 +- src/gallium/drivers/r300/r300_state_derived.c | 2 +- src/gallium/drivers/r300/r300_texture.c | 3 ++- src/gallium/drivers/r300/r300_texture_desc.c | 4 ++++ 6 files changed, 15 insertions(+), 11 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h index 06e4e12558..b4256c6278 100644 --- a/src/gallium/drivers/r300/r300_context.h +++ b/src/gallium/drivers/r300/r300_context.h @@ -361,13 +361,12 @@ struct r300_texture_desc { */ unsigned stride_in_bytes_override; - /* Whether this texture has non-power-of-two dimensions - * or a user-specified stride. - * It can be either a regular texture or a rectangle one. - * - * This flag says that hardware must use the stride for addressing - * instead of the width. - */ + /* Whether this texture has non-power-of-two dimensions. + * It can be either a regular texture or a rectangle one. */ + boolean is_npot; + + /* This flag says that hardware must use the stride for addressing + * instead of the width. */ boolean uses_stride_addressing; /* Whether CBZB fast color clear is allowed on the miplevel. */ diff --git a/src/gallium/drivers/r300/r300_fs.c b/src/gallium/drivers/r300/r300_fs.c index 6eac12bfb9..db5269912e 100644 --- a/src/gallium/drivers/r300/r300_fs.c +++ b/src/gallium/drivers/r300/r300_fs.c @@ -173,7 +173,7 @@ static void get_external_state( t = (struct r300_texture*)texstate->sampler_views[i]->base.texture; /* XXX this should probably take into account STR, not just S. */ - if (t->desc.uses_stride_addressing) { + if (t->desc.is_npot) { switch (s->state.wrap_s) { case PIPE_TEX_WRAP_REPEAT: state->unit[i].wrap_mode = RC_WRAP_REPEAT; diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c index 6e2a6ca0e4..bbea7e1589 100644 --- a/src/gallium/drivers/r300/r300_state.c +++ b/src/gallium/drivers/r300/r300_state.c @@ -1296,7 +1296,7 @@ static void r300_set_fragment_sampler_views(struct pipe_context* pipe, /* Set the texrect factor in the fragment shader. * Needed for RECT and NPOT fallback. */ texture = r300_texture(views[i]->texture); - if (texture->desc.uses_stride_addressing) { + if (texture->desc.is_npot) { r300->fs_rc_constant_state.dirty = TRUE; } diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c index e20d8d0fdf..48912e1555 100644 --- a/src/gallium/drivers/r300/r300_state_derived.c +++ b/src/gallium/drivers/r300/r300_state_derived.c @@ -583,7 +583,7 @@ static void r300_merge_textures_and_samplers(struct r300_context* r300) texstate->filter0 |= R300_TX_WRAP_T(R300_TX_CLAMP_TO_EDGE); } - if (tex->desc.uses_stride_addressing) { + if (tex->desc.is_npot) { /* NPOT textures don't support mip filter, unfortunately. * This prevents incorrect rendering. */ texstate->filter0 &= ~R300_TX_MIN_FILTER_MIP_MASK; diff --git a/src/gallium/drivers/r300/r300_texture.c b/src/gallium/drivers/r300/r300_texture.c index e99a4630ee..f1118dfd7d 100644 --- a/src/gallium/drivers/r300/r300_texture.c +++ b/src/gallium/drivers/r300/r300_texture.c @@ -552,7 +552,8 @@ static void r300_texture_setup_immutable_state(struct r300_screen* screen, f->format0 |= R300_TX_PITCH_EN; f->format2 = (tex->desc.stride_in_pixels[0] - 1) & 0x1fff; } else { - /* power of two textures (3D, mipmaps, and no pitch) */ + /* Power of two textures (3D, mipmaps, and no pitch), + * also NPOT textures with a width being POT. */ f->format0 |= R300_TX_DEPTH(util_logbase2(pt->depth0) & 0xf); } diff --git a/src/gallium/drivers/r300/r300_texture_desc.c b/src/gallium/drivers/r300/r300_texture_desc.c index becaa59bea..02591aa01f 100644 --- a/src/gallium/drivers/r300/r300_texture_desc.c +++ b/src/gallium/drivers/r300/r300_texture_desc.c @@ -272,6 +272,10 @@ static void r300_setup_flags(struct r300_texture_desc *desc) (desc->stride_in_bytes_override && stride_to_width(desc->b.b.format, desc->stride_in_bytes_override) != desc->b.b.width0); + + desc->is_npot = + desc->uses_stride_addressing || + !util_is_power_of_two(desc->b.b.height0); } static void r300_setup_cbzb_flags(struct r300_screen *rscreen, -- cgit v1.2.3 From 451a0ddb190e5185372fed9ec57d24a822442ecc Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Sun, 25 Jul 2010 10:07:12 +0200 Subject: r300g: make sure a texture is large enough for the CBZB clear The number of macrotiles in the Y direction must be even, otherwise memory corruption may happen (e.g. broken fonts). Basically, if we get a buffer in resource_from_handle, we can determine from the buffer size whether it's safe to use the CBZB clear or not. --- src/gallium/drivers/r300/r300_texture_desc.c | 101 +++++++++++++-------------- 1 file changed, 49 insertions(+), 52 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_texture_desc.c b/src/gallium/drivers/r300/r300_texture_desc.c index 02591aa01f..343089bf2c 100644 --- a/src/gallium/drivers/r300/r300_texture_desc.c +++ b/src/gallium/drivers/r300/r300_texture_desc.c @@ -154,7 +154,7 @@ static unsigned r300_texture_get_stride(struct r300_screen *screen, static unsigned r300_texture_get_nblocksy(struct r300_texture_desc *desc, unsigned level, - boolean align_for_cbzb) + boolean *out_aligned_for_cbzb) { unsigned height, tile_height; @@ -175,12 +175,28 @@ static unsigned r300_texture_get_nblocksy(struct r300_texture_desc *desc, height = util_next_power_of_two(height); } - /* Allocate an even number of macrotiles for the CBZB clear. - * Do so for 3 or more macrotiles in the Y direction. */ - if (align_for_cbzb && - level == 0 && desc->b.b.last_level == 0 && - desc->macrotile[0] && height >= tile_height * 3) { - height = align(height, tile_height * 2); + /* See if the CBZB clear can be used on the buffer, + * taking the texture size into account. */ + if (out_aligned_for_cbzb) { + if (desc->macrotile[level]) { + /* When clearing, the layer (width*height) is horizontally split + * into two, and the upper and lower halves are cleared by the CB + * and ZB units, respectively. Therefore, the number of macrotiles + * in the Y direction must be even. */ + + /* Align the height so that there is an even number of macrotiles. + * Do so for 3 or more macrotiles in the Y direction. */ + if (level == 0 && desc->b.b.last_level == 0 && + (desc->b.b.target == PIPE_TEXTURE_1D || + desc->b.b.target == PIPE_TEXTURE_2D) && + height >= tile_height * 3) { + height = align(height, tile_height * 2); + } + + *out_aligned_for_cbzb = height % (tile_height * 2) == 0; + } else { + *out_aligned_for_cbzb = FALSE; + } } } @@ -219,11 +235,15 @@ static unsigned stride_to_width(enum pipe_format format, } static void r300_setup_miptree(struct r300_screen *screen, - struct r300_texture_desc *desc) + struct r300_texture_desc *desc, + boolean align_for_cbzb) { struct pipe_resource *base = &desc->b.b; unsigned stride, size, layer_size, nblocksy, i; boolean rv350_mode = screen->caps.is_rv350; + boolean aligned_for_cbzb; + + desc->size_in_bytes = 0; SCREEN_DBG(screen, DBG_TEXALLOC, "r300: Making miptree for texture, format %s\n", @@ -238,7 +258,15 @@ static void r300_setup_miptree(struct r300_screen *screen, R300_BUFFER_TILED : R300_BUFFER_LINEAR; stride = r300_texture_get_stride(screen, desc, i); - nblocksy = r300_texture_get_nblocksy(desc, i, desc->stride_in_bytes_override == 0); + + /* Compute the number of blocks in Y, see if the CBZB clear can be + * used on the texture. */ + aligned_for_cbzb = FALSE; + if (align_for_cbzb && desc->cbzb_allowed[i]) + nblocksy = r300_texture_get_nblocksy(desc, i, &aligned_for_cbzb); + else + nblocksy = r300_texture_get_nblocksy(desc, i, NULL); + layer_size = stride * nblocksy; if (base->nr_samples) { @@ -255,6 +283,7 @@ static void r300_setup_miptree(struct r300_screen *screen, desc->layer_size_in_bytes[i] = layer_size; desc->stride_in_bytes[i] = stride; desc->stride_in_pixels[i] = stride_to_width(desc->b.b.format, stride); + desc->cbzb_allowed[i] = desc->cbzb_allowed[i] && aligned_for_cbzb; SCREEN_DBG(screen, DBG_TEXALLOC, "r300: Texture miptree: Level %d " "(%dx%dx%d px, pitch %d bytes) %d bytes total, macrotiled %s\n", @@ -296,44 +325,6 @@ static void r300_setup_cbzb_flags(struct r300_screen *rscreen, for (i = 0; i <= desc->b.b.last_level; i++) desc->cbzb_allowed[i] = first_level_valid && desc->macrotile[i]; - return; -#if 0 - /* When clearing, the layer (width*height) is horizontally split - * into two, and the upper and lower halves are cleared by the CB - * and ZB units, respectively. Therefore, the number of macrotiles - * in the Y direction must be even. */ - - if (desc->b.b.last_level > 0 || - desc->b.b.target == PIPE_TEXTURE_3D || - desc->b.b.target == PIPE_TEXTURE_CUBE) { - /* For mipmapped, 3D, or cube textures, just check if there are - * enough macrotiles per layer. */ - for (i = 0; i <= desc->b.b.last_level; i++) { - desc->cbzb_allowed[i] = FALSE; - - if (first_level_valid && desc->macrotile[i]) { - unsigned height, tile_height, num_macrotiles; - - /* Compute the number of macrotiles in the Y direction. */ - tile_height = r300_get_pixel_alignment(desc->b.b.format, - desc->b.b.nr_samples, - desc->microtile, - R300_BUFFER_TILED, - DIM_HEIGHT); - height = r300_texture_get_height(desc, i); - num_macrotiles = height / tile_height; - - desc->cbzb_allowed[i] = num_macrotiles % 2 == 0; - } - } - } else { - /* For 1D and 2D non-mipmapped textures */ - unsigned layer_size; - - layer_size = desc->stride_in_bytes[0] * - r300_texture_get_nblocksy(desc, 0, TRUE); - } -#endif } static void r300_setup_tiling(struct r300_screen *screen, @@ -409,8 +400,6 @@ boolean r300_texture_desc_init(struct r300_screen *rscreen, desc->stride_in_bytes_override = stride_in_bytes_override; - r300_setup_flags(desc); - if (microtile == R300_BUFFER_SELECT_LAYOUT || macrotile == R300_BUFFER_SELECT_LAYOUT) { r300_setup_tiling(rscreen, desc); @@ -420,10 +409,19 @@ boolean r300_texture_desc_init(struct r300_screen *rscreen, assert(desc->b.b.last_level == 0); } - r300_setup_miptree(rscreen, desc); - r300_texture_3d_fix_mipmapping(rscreen, desc); + r300_setup_flags(desc); r300_setup_cbzb_flags(rscreen, desc); + /* Setup the miptree description. */ + r300_setup_miptree(rscreen, desc, TRUE); + /* If the required buffer size is larger the given max size, + * try again without the alignment for the CBZB clear. */ + if (max_buffer_size && desc->size_in_bytes > max_buffer_size) { + r300_setup_miptree(rscreen, desc, FALSE); + } + + r300_texture_3d_fix_mipmapping(rscreen, desc); + if (max_buffer_size) { /* Make sure the buffer we got is large enough. */ if (desc->size_in_bytes > max_buffer_size) { @@ -437,7 +435,6 @@ boolean r300_texture_desc_init(struct r300_screen *rscreen, desc->buffer_size_in_bytes = max_buffer_size; } else { desc->buffer_size_in_bytes = desc->size_in_bytes; - } if (SCREEN_DBG_ON(rscreen, DBG_TEX)) -- cgit v1.2.3 From 6f2936c654c68388b9c43a189a1b8c06f3a9d241 Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Sun, 25 Jul 2010 23:40:51 +0200 Subject: r300g: implement D24X8 texture sampling for r3xx-r4xx Because the hw can't sample it, I reinterpret the format as G16R16 and sample the G component. This gives 16 bits of precision, which should be enough for depth texturing (surprisingly, the sampled values are exactly the same as in D16 textures). This also enables EXT_packed_depth_stencil on those old chipsets, finally. --- src/gallium/drivers/r300/r300_screen.c | 4 ---- src/gallium/drivers/r300/r300_state.c | 6 +++-- src/gallium/drivers/r300/r300_state_derived.c | 33 +++++++++++++++++---------- src/gallium/drivers/r300/r300_texture.c | 10 +++++--- src/gallium/drivers/r300/r300_texture.h | 3 ++- 5 files changed, 34 insertions(+), 22 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c index 5a11b98eb6..676430f5fe 100644 --- a/src/gallium/drivers/r300/r300_screen.c +++ b/src/gallium/drivers/r300/r300_screen.c @@ -257,8 +257,6 @@ static boolean r300_is_format_supported(struct pipe_screen* screen, uint32_t retval = 0; boolean is_r500 = r300_screen(screen)->caps.is_r500; boolean is_r400 = r300_screen(screen)->caps.is_r400; - boolean is_z24 = format == PIPE_FORMAT_X8Z24_UNORM || - format == PIPE_FORMAT_S8_USCALED_Z24_UNORM; boolean is_color2101010 = format == PIPE_FORMAT_R10G10B10A2_UNORM || format == PIPE_FORMAT_R10G10B10X2_SNORM || format == PIPE_FORMAT_B10G10R10A2_UNORM || @@ -293,8 +291,6 @@ static boolean r300_is_format_supported(struct pipe_screen* screen, /* Check sampler format support. */ if ((usage & PIPE_BIND_SAMPLER_VIEW) && - /* Z24 cannot be sampled from on non-r5xx. */ - (is_r500 || !is_z24) && /* ATI1N is r5xx-only. */ (is_r500 || !is_ati1n) && /* ATI2N is supported on r4xx-r5xx. */ diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c index bbea7e1589..3e221f2e02 100644 --- a/src/gallium/drivers/r300/r300_state.c +++ b/src/gallium/drivers/r300/r300_state.c @@ -1330,6 +1330,7 @@ r300_create_sampler_view(struct pipe_context *pipe, { struct r300_sampler_view *view = CALLOC_STRUCT(r300_sampler_view); struct r300_texture *tex = r300_texture(texture); + boolean is_r500 = r300_screen(pipe->screen)->caps.is_r500; if (view) { view->base = *templ; @@ -1345,8 +1346,9 @@ r300_create_sampler_view(struct pipe_context *pipe, view->format = tex->tx_format; view->format.format1 |= r300_translate_texformat(templ->format, - view->swizzle); - if (r300_screen(pipe->screen)->caps.is_r500) { + view->swizzle, + is_r500); + if (is_r500) { view->format.format2 |= r500_tx_format_msb_bit(templ->format); } } diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c index 48912e1555..a85db27064 100644 --- a/src/gallium/drivers/r300/r300_state_derived.c +++ b/src/gallium/drivers/r300/r300_state_derived.c @@ -528,15 +528,9 @@ static void r300_merge_textures_and_samplers(struct r300_context* r300) struct r300_sampler_state *sampler; struct r300_sampler_view *view; struct r300_texture *tex; - unsigned min_level, max_level, i, size; + unsigned min_level, max_level, i, j, size; unsigned count = MIN2(state->sampler_view_count, state->sampler_state_count); - unsigned char depth_swizzle[4] = { - UTIL_FORMAT_SWIZZLE_X, - UTIL_FORMAT_SWIZZLE_X, - UTIL_FORMAT_SWIZZLE_X, - UTIL_FORMAT_SWIZZLE_X - }; /* The KIL opcode fix, see below. */ if (!count && !r300->screen->caps.is_r500) @@ -563,14 +557,29 @@ static void r300_merge_textures_and_samplers(struct r300_context* r300) /* Assign a texture cache region. */ texstate->format.format1 |= view->texcache_region; - /* If compare mode is disabled, the sampler view swizzles - * are stored in the format. - * Otherwise, swizzles must be applied after the compare mode - * in the fragment shader. */ + /* Depth textures are kinda special. */ if (util_format_is_depth_or_stencil(tex->desc.b.b.format)) { + unsigned char depth_swizzle[4]; + + if (!r300->screen->caps.is_r500 && + util_format_get_blocksizebits(tex->desc.b.b.format) == 32) { + /* X24x8 is sampled as Y16X16 on r3xx-r4xx. + * The depth here is at the Y component. */ + for (j = 0; j < 4; j++) + depth_swizzle[j] = UTIL_FORMAT_SWIZZLE_Y; + } else { + for (j = 0; j < 4; j++) + depth_swizzle[j] = UTIL_FORMAT_SWIZZLE_X; + } + + /* If compare mode is disabled, sampler view swizzles + * are stored in the format. + * Otherwise, the swizzles must be applied after the compare + * mode in the fragment shader. */ if (sampler->state.compare_mode == PIPE_TEX_COMPARE_NONE) { texstate->format.format1 |= - r300_get_swizzle_combined(depth_swizzle, view->swizzle); + r300_get_swizzle_combined(depth_swizzle, + view->swizzle); } else { texstate->format.format1 |= r300_get_swizzle_combined(depth_swizzle, 0); diff --git a/src/gallium/drivers/r300/r300_texture.c b/src/gallium/drivers/r300/r300_texture.c index f1118dfd7d..fcdca5605e 100644 --- a/src/gallium/drivers/r300/r300_texture.c +++ b/src/gallium/drivers/r300/r300_texture.c @@ -105,7 +105,8 @@ unsigned r300_get_swizzle_combined(const unsigned char *swizzle_format, * The FORMAT specifies how the texture sampler will treat the texture, and * makes available X, Y, Z, W, ZERO, and ONE for swizzling. */ uint32_t r300_translate_texformat(enum pipe_format format, - const unsigned char *swizzle_view) + const unsigned char *swizzle_view, + boolean is_r500) { uint32_t result = 0; const struct util_format_description *desc; @@ -130,7 +131,10 @@ uint32_t r300_translate_texformat(enum pipe_format format, return R300_TX_FORMAT_X16; case PIPE_FORMAT_X8Z24_UNORM: case PIPE_FORMAT_S8_USCALED_Z24_UNORM: - return R500_TX_FORMAT_Y8X24; + if (is_r500) + return R500_TX_FORMAT_Y8X24; + else + return R300_TX_FORMAT_Y16X16; default: return ~0; /* Unsupported. */ } @@ -533,7 +537,7 @@ boolean r300_is_zs_format_supported(enum pipe_format format) boolean r300_is_sampler_format_supported(enum pipe_format format) { - return r300_translate_texformat(format, 0) != ~0; + return r300_translate_texformat(format, 0, TRUE) != ~0; } static void r300_texture_setup_immutable_state(struct r300_screen* screen, diff --git a/src/gallium/drivers/r300/r300_texture.h b/src/gallium/drivers/r300/r300_texture.h index 585036ab3b..a4524320fd 100644 --- a/src/gallium/drivers/r300/r300_texture.h +++ b/src/gallium/drivers/r300/r300_texture.h @@ -35,7 +35,8 @@ unsigned r300_get_swizzle_combined(const unsigned char *swizzle_format, const unsigned char *swizzle_view); uint32_t r300_translate_texformat(enum pipe_format format, - const unsigned char *swizzle_view); + const unsigned char *swizzle_view, + boolean is_r500); uint32_t r500_tx_format_msb_bit(enum pipe_format format); -- cgit v1.2.3 From 8c26dc2bfeaa2504d6bcc31caa200299d47772b8 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Mon, 26 Jul 2010 11:56:12 +1000 Subject: r300g: fix macro substitution problem isn't a problem yet, but have issues in hiz branch. Signed-off-by: Dave Airlie --- src/gallium/drivers/r300/r300_cs.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_cs.h b/src/gallium/drivers/r300/r300_cs.h index 3beb625d43..c194d6a1b0 100644 --- a/src/gallium/drivers/r300/r300_cs.h +++ b/src/gallium/drivers/r300/r300_cs.h @@ -136,8 +136,8 @@ #define WRITE_CS_TABLE(values, count) do { \ CS_DEBUG(assert(cs_count == 0);) \ - memcpy(cs_copy->ptr + cs_copy->cdw, values, count * 4); \ - cs_copy->cdw += count; \ + memcpy(cs_copy->ptr + cs_copy->cdw, (values), (count) * 4); \ + cs_copy->cdw += (count); \ } while (0) #endif /* R300_CS_H */ -- cgit v1.2.3 From 0bebdc230ff09f191cfa269c2cbcbb257fd2e0fc Mon Sep 17 00:00:00 2001 From: Stephan Schmid Date: Mon, 26 Jul 2010 07:52:12 +0200 Subject: r600g: implememt the LIT instruction --- src/gallium/drivers/r600/r600_shader.c | 120 ++++++++++++++++++++++++++++++++- 1 file changed, 119 insertions(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index e865f013f7..e5e6786fd0 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -583,6 +583,124 @@ static int tgsi_slt(struct r600_shader_ctx *ctx) return 0; } +static int tgsi_lit(struct r600_shader_ctx *ctx) +{ + struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; + struct r600_bc_alu alu; + + int r; + + + if (inst->Dst[0].Register.WriteMask & (1 << 0)) + { + /* dst.x, <- 1.0 */ + memset(&alu, 0, sizeof(struct r600_bc_alu)); + alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV; + alu.src[0].sel = 249; /*1.0*/ + alu.src[0].chan = 0; + r = tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst); + if (r) + return r; + if ((inst->Dst[0].Register.WriteMask & 0xe) == 0) + alu.last = 1; + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + } + + + if (inst->Dst[0].Register.WriteMask & (1 << 1)) + { + /* dst.y = max(src.x, 0.0) */ + memset(&alu, 0, sizeof(struct r600_bc_alu)); + alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX; + r = tgsi_src(ctx, &inst->Src[0], 0, &alu.src[0]); + if (r) + return r; + alu.src[1].sel = 248; /*0.0*/ + alu.src[1].chan = 0; + r = tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst); + if (r) + return r; + if ((inst->Dst[0].Register.WriteMask & 0xa) == 0) + alu.last = 1; + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + } + + if (inst->Dst[0].Register.WriteMask & (1 << 3)) + { + /* dst.w, <- 1.0 */ + memset(&alu, 0, sizeof(struct r600_bc_alu)); + alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV; + alu.src[0].sel = 249; + alu.src[0].chan = 0; + r = tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst); + if (r) + return r; + if ((inst->Dst[0].Register.WriteMask & 0x4) == 0) + alu.last = 1; + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + } + + if (inst->Dst[0].Register.WriteMask & (1 << 2)) + { + /* dst.z = log(src.y) */ + memset(&alu, 0, sizeof(struct r600_bc_alu)); + alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED; + r = tgsi_src(ctx, &inst->Src[0], 1, &alu.src[0]); + if (r) + return r; + r = tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst); + if (r) + return r; + alu.last = 1; + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + + int chan = alu.dst.chan; + int sel = alu.dst.sel; + + /* tmp.x = amd MUL_LIT(src.w, dst.z, src.x ) */ + memset(&alu, 0, sizeof(struct r600_bc_alu)); + alu.inst = V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT; + r = tgsi_src(ctx, &inst->Src[0], 3, &alu.src[0]); + if (r) + return r; + alu.src[1].sel = sel; + alu.src[1].chan = chan; + r = tgsi_src(ctx, &inst->Src[0], 0, &alu.src[2]); + if (r) + return r; + alu.dst.sel = ctx->temp_reg; + alu.dst.chan = 0; + alu.dst.write = 1; + alu.is_op3 = 1; + alu.last = 1; + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + + /* dst.z = exp(tmp.x) */ + memset(&alu, 0, sizeof(struct r600_bc_alu)); + alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE; + alu.src[0].sel = ctx->temp_reg; + alu.src[0].chan = 0; + r = tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst); + if (r) + return r; + alu.last = 1; + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + } + return 0; +} + static int tgsi_trans(struct r600_shader_ctx *ctx) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; @@ -735,7 +853,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = { {TGSI_OPCODE_ARL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_MOV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2}, - {TGSI_OPCODE_LIT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_LIT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit}, {TGSI_OPCODE_RCP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_RSQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE, tgsi_trans}, {TGSI_OPCODE_EXP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, -- cgit v1.2.3 From d2c714627ca490b23639ba4c32603f56b9d993e3 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Mon, 26 Jul 2010 20:48:46 -0600 Subject: cell: added const qualifier --- src/gallium/drivers/cell/ppu/cell_context.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/ppu/cell_context.h b/src/gallium/drivers/cell/ppu/cell_context.h index 07b6eebc69..dc46e59a2d 100644 --- a/src/gallium/drivers/cell/ppu/cell_context.h +++ b/src/gallium/drivers/cell/ppu/cell_context.h @@ -154,7 +154,7 @@ struct cell_context struct vertex_info vertex_info; /** Mapped constant buffers */ - void *mapped_constants[PIPE_SHADER_TYPES]; + const void *mapped_constants[PIPE_SHADER_TYPES]; PIPE_ALIGN_VAR(16) struct cell_spu_function_info spu_functions; -- cgit v1.2.3 From 2a8021667de68c6ee2723b52a27dd980c1ca5602 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Mon, 26 Jul 2010 20:49:09 -0600 Subject: cell: comment-out unneeded padding field --- src/gallium/drivers/cell/common.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/common.h b/src/gallium/drivers/cell/common.h index bbb112fd33..a8cdde34aa 100644 --- a/src/gallium/drivers/cell/common.h +++ b/src/gallium/drivers/cell/common.h @@ -230,7 +230,7 @@ struct cell_command_rasterizer { opcode_t opcode; /**< CELL_CMD_STATE_RASTERIZER */ struct pipe_rasterizer_state rasterizer; - uint32_t pad[1]; + /*uint32_t pad[1];*/ }; -- cgit v1.2.3 From 0315c00f5897eb443254323be2b7b5ca213fbef2 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Mon, 26 Jul 2010 20:49:35 -0600 Subject: cell: fix segfault when freeing samplers --- src/gallium/drivers/cell/ppu/cell_pipe_state.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/ppu/cell_pipe_state.c b/src/gallium/drivers/cell/ppu/cell_pipe_state.c index 03f84d295b..223adda48f 100644 --- a/src/gallium/drivers/cell/ppu/cell_pipe_state.c +++ b/src/gallium/drivers/cell/ppu/cell_pipe_state.c @@ -281,7 +281,7 @@ cell_set_fragment_sampler_views(struct pipe_context *pipe, struct pipe_resource *new_tex = new_view ? new_view->texture : NULL; pipe_sampler_view_reference(&cell->fragment_sampler_views[i], - views[i]); + new_view); pipe_resource_reference((struct pipe_resource **) &cell->texture[i], (struct pipe_resource *) new_tex); -- cgit v1.2.3 From dd0cf2e1f5d45337f7e5870ed957ac70f364ba52 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Mon, 26 Jul 2010 20:49:54 -0600 Subject: cell: make functions static --- src/gallium/drivers/cell/ppu/cell_state_vertex.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/ppu/cell_state_vertex.c b/src/gallium/drivers/cell/ppu/cell_state_vertex.c index 9510ea9ac2..69152b6cbf 100644 --- a/src/gallium/drivers/cell/ppu/cell_state_vertex.c +++ b/src/gallium/drivers/cell/ppu/cell_state_vertex.c @@ -36,7 +36,7 @@ #include "draw/draw_context.h" -void * +static void * cell_create_vertex_elements_state(struct pipe_context *pipe, unsigned count, const struct pipe_vertex_element *attribs) @@ -51,7 +51,7 @@ cell_create_vertex_elements_state(struct pipe_context *pipe, return velems; } -void +static void cell_bind_vertex_elements_state(struct pipe_context *pipe, void *velems) { @@ -66,7 +66,7 @@ cell_bind_vertex_elements_state(struct pipe_context *pipe, draw_set_vertex_elements(cell->draw, cell_velems->count, cell_velems->velem); } -void +static void cell_delete_vertex_elements_state(struct pipe_context *pipe, void *velems) { FREE( velems ); -- cgit v1.2.3 From fef5ae949fd71419bb2a25f0944f0b50e60a668a Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Mon, 26 Jul 2010 20:50:17 -0600 Subject: cell: comment-out unused fields, functions --- src/gallium/drivers/cell/ppu/cell_fence.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/ppu/cell_fence.c b/src/gallium/drivers/cell/ppu/cell_fence.c index 34ca864155..e7c9fc46d9 100644 --- a/src/gallium/drivers/cell/ppu/cell_fence.c +++ b/src/gallium/drivers/cell/ppu/cell_fence.c @@ -87,6 +87,7 @@ struct cell_buffer_node }; +#if 0 static void cell_add_buffer_to_list(struct cell_context *cell, struct cell_buffer_list *list, @@ -100,6 +101,7 @@ cell_add_buffer_to_list(struct cell_context *cell, list->head = node; } } +#endif /** @@ -113,7 +115,7 @@ cell_free_fenced_buffers(struct cell_context *cell, struct cell_buffer_list *list) { if (list->head) { - struct pipe_screen *ps = cell->pipe.screen; + /*struct pipe_screen *ps = cell->pipe.screen;*/ struct cell_buffer_node *node; cell_fence_finish(cell, &list->fence); @@ -146,7 +148,7 @@ cell_free_fenced_buffers(struct cell_context *cell, void cell_add_fenced_textures(struct cell_context *cell) { - struct cell_buffer_list *list = &cell->fenced_buffers[cell->cur_batch]; + /*struct cell_buffer_list *list = &cell->fenced_buffers[cell->cur_batch];*/ uint i; for (i = 0; i < cell->num_textures; i++) { -- cgit v1.2.3 From 4fbffb7d909f9746fb744e133563c80c66574adb Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Tue, 27 Jul 2010 13:09:12 +0100 Subject: llvmpipe: Use lp_build_select_bitwise() where appropriate. Fixes fdo 29269. --- src/gallium/drivers/llvmpipe/lp_bld_depth.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/llvmpipe/lp_bld_depth.c b/src/gallium/drivers/llvmpipe/lp_bld_depth.c index e05bbe5011..99a768afd8 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_depth.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_depth.c @@ -258,16 +258,16 @@ lp_build_stencil_op_single(struct lp_build_context *bld, } if (stencil->writemask != stencilMax) { - /* compute res = (res & mask) | (stencilVals & ~mask) */ - LLVMValueRef mask = lp_build_const_int_vec(type, stencil->writemask); - LLVMValueRef cmask = LLVMBuildNot(bld->builder, mask, "notWritemask"); - LLVMValueRef t1 = LLVMBuildAnd(bld->builder, res, mask, "t1"); - LLVMValueRef t2 = LLVMBuildAnd(bld->builder, stencilVals, cmask, "t2"); - res = LLVMBuildOr(bld->builder, t1, t2, "t1_or_t2"); + /* mask &= stencil->writemask */ + LLVMValueRef writemask = lp_build_const_int_vec(type, stencil->writemask); + mask = LLVMBuildAnd(bld->builder, mask, writemask, ""); + /* res = (res & mask) | (stencilVals & ~mask) */ + res = lp_build_select_bitwise(bld, writemask, res, stencilVals); + } + else { + /* res = mask ? res : stencilVals */ + res = lp_build_select(bld, mask, res, stencilVals); } - - /* only the update the vector elements enabled by 'mask' */ - res = lp_build_select(bld, mask, res, stencilVals); return res; } @@ -662,9 +662,9 @@ lp_build_depth_stencil_test(LLVMBuilderRef builder, } /* Mix the old and new Z buffer values. - * z_dst[i] = zselectmask[i] ? z_src[i] : z_dst[i] + * z_dst[i] = (zselectmask[i] & z_src[i]) | (~zselectmask[i] & z_dst[i]) */ - z_dst = lp_build_select(&bld, zselectmask, z_src, z_dst); + z_dst = lp_build_select_bitwise(&bld, zselectmask, z_src, z_dst); } if (stencil[0].enabled) { -- cgit v1.2.3 From 2b3b76a4a0e21eb4bd4f1a4da5ff6ed26ccbabd1 Mon Sep 17 00:00:00 2001 From: Jerome Glisse Date: Tue, 27 Jul 2010 13:48:49 -0400 Subject: r600g: always emit literal after emiting an alu instruction Make sure we always fill in the literal after alu instruction. Signed-off-by: Jerome Glisse --- src/gallium/drivers/r600/r600_shader.c | 61 ++++++++++++++++++---------------- 1 file changed, 32 insertions(+), 29 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index e5e6786fd0..dcedf56c11 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -34,6 +34,30 @@ #include #include + +struct r600_shader_tgsi_instruction; + +struct r600_shader_ctx { + struct tgsi_shader_info info; + struct tgsi_parse_context parse; + const struct tgsi_token *tokens; + unsigned type; + unsigned file_offset[TGSI_FILE_COUNT]; + unsigned temp_reg; + struct r600_shader_tgsi_instruction *inst_info; + struct r600_bc *bc; + struct r600_shader *shader; + u32 value[4]; +}; + +struct r600_shader_tgsi_instruction { + unsigned tgsi_opcode; + unsigned is_op3; + unsigned r600_opcode; + int (*process)(struct r600_shader_ctx *ctx); +}; + +static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[]; static int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader); static int r600_shader_update(struct pipe_context *ctx, struct r600_shader *shader) @@ -216,29 +240,6 @@ int r600_pipe_shader_update(struct pipe_context *ctx, struct r600_pipe_shader *r return r600_pipe_shader(ctx, rpshader); } -struct r600_shader_tgsi_instruction; - -struct r600_shader_ctx { - struct tgsi_shader_info info; - struct tgsi_parse_context parse; - const struct tgsi_token *tokens; - unsigned type; - unsigned file_offset[TGSI_FILE_COUNT]; - unsigned temp_reg; - struct r600_shader_tgsi_instruction *inst_info; - struct r600_bc *bc; - struct r600_shader *shader; -}; - -struct r600_shader_tgsi_instruction { - unsigned tgsi_opcode; - unsigned is_op3; - unsigned r600_opcode; - int (*process)(struct r600_shader_ctx *ctx); -}; - -static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[]; - static int tgsi_is_supported(struct r600_shader_ctx *ctx) { struct tgsi_full_instruction *i = &ctx->parse.FullToken.FullInstruction; @@ -334,7 +335,6 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s struct r600_bc_output output; unsigned opcode; int i, r = 0, pos0; - u32 value[4]; ctx.bc = &shader->bc; ctx.shader = shader; @@ -380,10 +380,10 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s switch (ctx.parse.FullToken.Token.Type) { case TGSI_TOKEN_TYPE_IMMEDIATE: immediate = &ctx.parse.FullToken.FullImmediate; - value[0] = immediate->u[0].Uint; - value[1] = immediate->u[1].Uint; - value[2] = immediate->u[2].Uint; - value[3] = immediate->u[3].Uint; + ctx.value[0] = immediate->u[0].Uint; + ctx.value[1] = immediate->u[1].Uint; + ctx.value[2] = immediate->u[2].Uint; + ctx.value[3] = immediate->u[3].Uint; break; case TGSI_TOKEN_TYPE_DECLARATION: r = tgsi_declaration(&ctx); @@ -399,7 +399,7 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s r = ctx.inst_info->process(&ctx); if (r) goto out_err; - r = r600_bc_add_literal(ctx.bc, value); + r = r600_bc_add_literal(ctx.bc, ctx.value); if (r) goto out_err; break; @@ -557,6 +557,9 @@ static int tgsi_slt(struct r600_shader_ctx *ctx) struct r600_bc_alu alu; int i, r; + r = r600_bc_add_literal(ctx->bc, ctx->value); + if (r) + return r; for (i = 0; i < 4; i++) { memset(&alu, 0, sizeof(struct r600_bc_alu)); if (!(inst->Dst[0].Register.WriteMask & (1 << i))) { -- cgit v1.2.3 From 6a20539580e1b7ae921d497fbf66a8fd545efea4 Mon Sep 17 00:00:00 2001 From: Vinson Lee Date: Tue, 27 Jul 2010 15:33:45 -0700 Subject: r600g: Move declaration before code. Fixes SCons build. --- src/gallium/drivers/r600/r600_shader.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index dcedf56c11..17e9c14d6b 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -651,6 +651,9 @@ static int tgsi_lit(struct r600_shader_ctx *ctx) if (inst->Dst[0].Register.WriteMask & (1 << 2)) { + int chan; + int sel; + /* dst.z = log(src.y) */ memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED; @@ -665,8 +668,8 @@ static int tgsi_lit(struct r600_shader_ctx *ctx) if (r) return r; - int chan = alu.dst.chan; - int sel = alu.dst.sel; + chan = alu.dst.chan; + sel = alu.dst.sel; /* tmp.x = amd MUL_LIT(src.w, dst.z, src.x ) */ memset(&alu, 0, sizeof(struct r600_bc_alu)); -- cgit v1.2.3 From 25472942c9640f6c0d252de2f013d04ac2355b1d Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Tue, 27 Jul 2010 17:40:00 -0600 Subject: llvmpipe: pass face+slice to llvmpipe_unswizzle_cbuf_tile() Cube map faces and 3D texture slices are treated the same in llvmpipe textures. Need to pass the sum of these fields to llvmpipe_unswizzle_cbuf_tile() as we do elsewhere. Fixes piglit fbo-3d test (fd.o bug 29135). --- src/gallium/drivers/llvmpipe/lp_rast.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index 654f4ea48e..ba7b48328b 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -368,14 +368,15 @@ lp_rast_store_linear_color( struct lp_rasterizer_task *task, for (buf = 0; buf < rast->state.nr_cbufs; buf++) { struct pipe_surface *cbuf = scene->fb.cbufs[buf]; - const unsigned face = cbuf->face, level = cbuf->level; + const unsigned face_slice = cbuf->face + cbuf->zslice; + const unsigned level = cbuf->level; struct llvmpipe_resource *lpt = llvmpipe_resource(cbuf->texture); if (!task->color_tiles[buf]) continue; llvmpipe_unswizzle_cbuf_tile(lpt, - face, + face_slice, level, task->x, task->y, task->color_tiles[buf]); -- cgit v1.2.3 From 641c9adb09e8707f659d42be600d16902ebf8895 Mon Sep 17 00:00:00 2001 From: Jerome Glisse Date: Tue, 27 Jul 2010 20:15:17 -0400 Subject: r600g: texture support Add texture mapping support, redbook/texbind works if you comment out glClear and second checkboard. Need to fix : - texture overwritting - lod & mip/map handling - unormalized coordinate handling - texture view with first leve > 0 - and many other things Signed-off-by: Jerome Glisse --- src/gallium/drivers/r600/r600_context.c | 2 +- src/gallium/drivers/r600/r600_context.h | 12 ++ src/gallium/drivers/r600/r600_draw.c | 11 ++ src/gallium/drivers/r600/r600_shader.c | 78 ++++++++- src/gallium/drivers/r600/r600_state.c | 277 +++++++++++++++++++++++++++++++- src/gallium/drivers/r600/r600_texture.c | 4 + src/gallium/drivers/r600/r600d.h | 235 +++++++++++++++++++++++++++ 7 files changed, 610 insertions(+), 9 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/r600_context.c b/src/gallium/drivers/r600/r600_context.c index 05575b5767..3c5195f79e 100644 --- a/src/gallium/drivers/r600/r600_context.c +++ b/src/gallium/drivers/r600/r600_context.c @@ -55,7 +55,7 @@ static void r600_flush(struct pipe_context *ctx, unsigned flags, */ if (!dc) radeon_ctx_dump_bof(rctx->ctx, "gallium.bof"); -#if 0 +#if 1 radeon_ctx_submit(rctx->ctx); #endif rctx->ctx = radeon_ctx_decref(rctx->ctx); diff --git a/src/gallium/drivers/r600/r600_context.h b/src/gallium/drivers/r600/r600_context.h index 669aaec0b2..30f33f757e 100644 --- a/src/gallium/drivers/r600/r600_context.h +++ b/src/gallium/drivers/r600/r600_context.h @@ -23,6 +23,7 @@ #ifndef R600_CONTEXT_H #define R600_CONTEXT_H +#include #include #include #include @@ -45,6 +46,11 @@ struct r600_pipe_shader { struct radeon_state *state; }; +struct r600_texture_resource { + struct pipe_sampler_view view; + struct radeon_state *state; +}; + struct r600_context { struct pipe_context context; struct r600_screen *screen; @@ -55,6 +61,12 @@ struct r600_context { struct radeon_state *config; struct r600_pipe_shader *ps_shader; struct r600_pipe_shader *vs_shader; + unsigned nps_sampler; + struct radeon_state *ps_sampler[PIPE_MAX_ATTRIBS]; + unsigned nps_view; + unsigned nvs_view; + struct r600_texture_resource *ps_view[PIPE_MAX_ATTRIBS]; + struct r600_texture_resource *vs_view[PIPE_MAX_ATTRIBS]; unsigned flat_shade; unsigned nvertex_buffer; struct r600_vertex_elements_state *vertex_elements; diff --git a/src/gallium/drivers/r600/r600_draw.c b/src/gallium/drivers/r600/r600_draw.c index 724fb6c988..e0d624889f 100644 --- a/src/gallium/drivers/r600/r600_draw.c +++ b/src/gallium/drivers/r600/r600_draw.c @@ -125,6 +125,17 @@ static int r600_draw_common(struct r600_draw *draw) if (r) return r; } + /* setup texture sampler & resource */ + for (i = 0 ; i < rctx->nps_sampler; i++) { + r = radeon_draw_set_new(rctx->draw, rctx->ps_sampler[i]); + if (r) + return r; + } + for (i = 0 ; i < rctx->nps_view; i++) { + r = radeon_draw_set_new(rctx->draw, rctx->ps_view[i]->state); + if (r) + return r; + } /* FIXME start need to change winsys */ draw->draw = radeon_state(rscreen->rw, R600_DRAW_TYPE, R600_DRAW); if (draw->draw == NULL) diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index 17e9c14d6b..9796112775 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -838,12 +838,82 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; struct r600_bc_tex tex; + struct r600_bc_alu alu; + unsigned src_gpr; + int r; + + src_gpr = ctx->file_offset[inst->Src[0].Register.File] + inst->Src[0].Register.Index; + + /* Add perspective divide */ + if (ctx->inst_info->tgsi_opcode == TGSI_OPCODE_TXP) { + memset(&alu, 0, sizeof(struct r600_bc_alu)); + alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE; + alu.src[0].sel = src_gpr; + alu.src[0].chan = 3; + alu.dst.sel = ctx->temp_reg; + alu.dst.chan = 3; + alu.last = 1; + alu.dst.write = 1; + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + + memset(&alu, 0, sizeof(struct r600_bc_alu)); + alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL; + alu.src[0].sel = ctx->temp_reg; + alu.src[0].chan = 3; + alu.src[1].sel = src_gpr; + alu.src[1].chan = 0; + alu.dst.sel = ctx->temp_reg; + alu.dst.chan = 0; + alu.dst.write = 1; + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + memset(&alu, 0, sizeof(struct r600_bc_alu)); + alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL; + alu.src[0].sel = ctx->temp_reg; + alu.src[0].chan = 3; + alu.src[1].sel = src_gpr; + alu.src[1].chan = 1; + alu.dst.sel = ctx->temp_reg; + alu.dst.chan = 1; + alu.dst.write = 1; + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + memset(&alu, 0, sizeof(struct r600_bc_alu)); + alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL; + alu.src[0].sel = ctx->temp_reg; + alu.src[0].chan = 3; + alu.src[1].sel = src_gpr; + alu.src[1].chan = 2; + alu.dst.sel = ctx->temp_reg; + alu.dst.chan = 2; + alu.dst.write = 1; + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + memset(&alu, 0, sizeof(struct r600_bc_alu)); + alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV; + alu.src[0].sel = 249; + alu.src[0].chan = 0; + alu.dst.sel = ctx->temp_reg; + alu.dst.chan = 3; + alu.last = 1; + alu.dst.write = 1; + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + src_gpr = ctx->temp_reg; + } + /* TODO use temp if src_gpr is not a temporary reg (File != TEMPORARY) */ memset(&tex, 0, sizeof(struct r600_bc_tex)); tex.inst = ctx->inst_info->r600_opcode; tex.resource_id = ctx->file_offset[inst->Src[1].Register.File] + inst->Src[1].Register.Index; tex.sampler_id = tex.resource_id; - tex.src_gpr = ctx->file_offset[inst->Src[0].Register.File] + inst->Src[0].Register.Index; + tex.src_gpr = src_gpr; tex.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Src[0].Register.Index; tex.dst_sel_x = 0; tex.dst_sel_y = 1; @@ -853,6 +923,10 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) tex.src_sel_y = 1; tex.src_sel_z = 2; tex.src_sel_w = 3; + tex.coord_type_x = 1; + tex.coord_type_y = 1; + tex.coord_type_z = 1; + tex.coord_type_w = 1; return r600_bc_add_tex(ctx->bc, &tex); } @@ -912,7 +986,7 @@ static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = { {TGSI_OPCODE_SLE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_SNE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_STR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_TEX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_TEX, 0, 0x10, tgsi_tex}, {TGSI_OPCODE_TXD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_TXP, 0, 0x10, tgsi_tex}, {TGSI_OPCODE_UP2H, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index 84a13e4ef7..6503c3740e 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -187,6 +187,8 @@ static void *r600_create_rs_state(struct pipe_context *ctx, struct radeon_state *rstate; rctx->flat_shade = state->flatshade; + rctx->flat_shade = 0; +R600_ERR("flat shade with texture broke tex coord interp\n"); rstate = radeon_state(rscreen->rw, R600_RASTERIZER_TYPE, R600_RASTERIZER); if (rstate == NULL) return NULL; @@ -224,43 +226,306 @@ static void r600_bind_rs_state(struct pipe_context *ctx, void *state) radeon_draw_set(rctx->draw, state); } +static inline unsigned r600_tex_wrap(unsigned wrap) +{ + switch (wrap) { + default: + case PIPE_TEX_WRAP_REPEAT: + return V_03C000_SQ_TEX_WRAP; + case PIPE_TEX_WRAP_CLAMP: + return V_03C000_SQ_TEX_CLAMP_LAST_TEXEL; + case PIPE_TEX_WRAP_CLAMP_TO_EDGE: + return V_03C000_SQ_TEX_CLAMP_HALF_BORDER; + case PIPE_TEX_WRAP_CLAMP_TO_BORDER: + return V_03C000_SQ_TEX_CLAMP_BORDER; + case PIPE_TEX_WRAP_MIRROR_REPEAT: + return V_03C000_SQ_TEX_MIRROR; + case PIPE_TEX_WRAP_MIRROR_CLAMP: + return V_03C000_SQ_TEX_MIRROR_ONCE_LAST_TEXEL; + case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: + return V_03C000_SQ_TEX_MIRROR_ONCE_HALF_BORDER; + case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: + return V_03C000_SQ_TEX_MIRROR_ONCE_BORDER; + } +} + +static inline unsigned r600_tex_filter(unsigned filter) +{ + switch (filter) { + default: + case PIPE_TEX_FILTER_NEAREST: + return V_03C000_SQ_TEX_XY_FILTER_POINT; + case PIPE_TEX_FILTER_LINEAR: + return V_03C000_SQ_TEX_XY_FILTER_BILINEAR; + } +} + +static inline unsigned r600_tex_mipfilter(unsigned filter) +{ + switch (filter) { + case PIPE_TEX_MIPFILTER_NEAREST: + return V_03C000_SQ_TEX_Z_FILTER_POINT; + case PIPE_TEX_MIPFILTER_LINEAR: + return V_03C000_SQ_TEX_Z_FILTER_LINEAR; + default: + case PIPE_TEX_MIPFILTER_NONE: + return V_03C000_SQ_TEX_Z_FILTER_NONE; + } +} + +static inline unsigned r600_tex_compare(unsigned compare) +{ + switch (compare) { + default: + case PIPE_FUNC_NEVER: + return V_03C000_SQ_TEX_DEPTH_COMPARE_NEVER; + case PIPE_FUNC_LESS: + return V_03C000_SQ_TEX_DEPTH_COMPARE_LESS; + case PIPE_FUNC_EQUAL: + return V_03C000_SQ_TEX_DEPTH_COMPARE_EQUAL; + case PIPE_FUNC_LEQUAL: + return V_03C000_SQ_TEX_DEPTH_COMPARE_LESSEQUAL; + case PIPE_FUNC_GREATER: + return V_03C000_SQ_TEX_DEPTH_COMPARE_GREATER; + case PIPE_FUNC_NOTEQUAL: + return V_03C000_SQ_TEX_DEPTH_COMPARE_NOTEQUAL; + case PIPE_FUNC_GEQUAL: + return V_03C000_SQ_TEX_DEPTH_COMPARE_GREATEREQUAL; + case PIPE_FUNC_ALWAYS: + return V_03C000_SQ_TEX_DEPTH_COMPARE_ALWAYS; + } +} + static void *r600_create_sampler_state(struct pipe_context *ctx, const struct pipe_sampler_state *state) { - return NULL; + struct r600_screen *rscreen = r600_screen(ctx->screen); + struct radeon_state *rstate; + + rstate = radeon_state(rscreen->rw, R600_PS_SAMPLER_TYPE, R600_PS_SAMPLER); + if (rstate == NULL) + return NULL; + rstate->states[R600_PS_SAMPLER__SQ_TEX_SAMPLER_WORD0_0] = + S_03C000_CLAMP_X(r600_tex_wrap(state->wrap_s)) | + S_03C000_CLAMP_Y(r600_tex_wrap(state->wrap_t)) | + S_03C000_CLAMP_Z(r600_tex_wrap(state->wrap_r)) | + S_03C000_XY_MAG_FILTER(r600_tex_filter(state->mag_img_filter)) | + S_03C000_XY_MIN_FILTER(r600_tex_filter(state->min_img_filter)) | + S_03C000_MIP_FILTER(r600_tex_mipfilter(state->min_mip_filter)) | + S_03C000_DEPTH_COMPARE_FUNCTION(r600_tex_compare(state->compare_func)); + /* FIXME LOD it depends on texture base level ... */ + rstate->states[R600_PS_SAMPLER__SQ_TEX_SAMPLER_WORD1_0] = + S_03C004_MIN_LOD(0) | + S_03C004_MAX_LOD(0) | + S_03C004_LOD_BIAS(0); + rstate->states[R600_PS_SAMPLER__SQ_TEX_SAMPLER_WORD2_0] = S_03C008_TYPE(1); + if (radeon_state_pm4(rstate)) { + radeon_state_decref(rstate); + return NULL; + } + return rstate; } static void r600_bind_sampler_states(struct pipe_context *ctx, unsigned count, void **states) { + struct r600_context *rctx = r600_context(ctx); + unsigned i; + + /* FIXME split VS/PS/GS sampler */ + for (i = 0; i < count; i++) { + rctx->ps_sampler[i] = radeon_state_decref(rctx->ps_sampler[i]); + } + rctx->nps_sampler = count; + for (i = 0; i < count; i++) { + rctx->ps_sampler[i] = radeon_state_incref(states[i]); + rctx->ps_sampler[i]->id = R600_PS_SAMPLER + i; + } +} + +static inline unsigned r600_tex_swizzle(unsigned swizzle) +{ + switch (swizzle) { + case PIPE_SWIZZLE_RED: + return V_038010_SQ_SEL_X; + case PIPE_SWIZZLE_GREEN: + return V_038010_SQ_SEL_Y; + case PIPE_SWIZZLE_BLUE: + return V_038010_SQ_SEL_Z; + case PIPE_SWIZZLE_ALPHA: + return V_038010_SQ_SEL_W; + case PIPE_SWIZZLE_ZERO: + return V_038010_SQ_SEL_0; + default: + case PIPE_SWIZZLE_ONE: + return V_038010_SQ_SEL_1; + } +} + +static inline unsigned r600_format_type(unsigned format_type) +{ + switch (format_type) { + default: + case UTIL_FORMAT_TYPE_UNSIGNED: + return V_038010_SQ_FORMAT_COMP_UNSIGNED; + case UTIL_FORMAT_TYPE_SIGNED: + return V_038010_SQ_FORMAT_COMP_SIGNED; + case UTIL_FORMAT_TYPE_FIXED: + return V_038010_SQ_FORMAT_COMP_UNSIGNED_BIASED; + } +} + +static inline unsigned r600_tex_dim(unsigned dim) +{ + switch (dim) { + default: + case PIPE_TEXTURE_1D: + return V_038000_SQ_TEX_DIM_1D; + case PIPE_TEXTURE_2D: + return V_038000_SQ_TEX_DIM_2D; + case PIPE_TEXTURE_3D: + return V_038000_SQ_TEX_DIM_3D; + case PIPE_TEXTURE_CUBE: + return V_038000_SQ_TEX_DIM_CUBEMAP; + } } static struct pipe_sampler_view *r600_create_sampler_view(struct pipe_context *ctx, struct pipe_resource *texture, - const struct pipe_sampler_view *templ) + const struct pipe_sampler_view *view) { - struct pipe_sampler_view *view = CALLOC_STRUCT(pipe_sampler_view); + struct r600_screen *rscreen = r600_screen(ctx->screen); + struct r600_texture_resource *rtexture; + const struct util_format_description *desc; + struct r600_texture *tmp; + struct r600_buffer *rbuffer; + unsigned format; - *view = *templ; - return view; + if (r600_conv_pipe_format(texture->format, &format)) + return NULL; + rtexture = CALLOC_STRUCT(r600_texture_resource); + if (rtexture == NULL) + return NULL; + desc = util_format_description(texture->format); + assert(desc == NULL); + rtexture->state = radeon_state(rscreen->rw, R600_PS_RESOURCE_TYPE, R600_PS_RESOURCE); + if (rtexture->state == NULL) { + FREE(rtexture); + return NULL; + } + rtexture->view = *view; + rtexture->view.reference.count = 1; + rtexture->view.texture = NULL; + pipe_resource_reference(&rtexture->view.texture, texture); + rtexture->view.context = ctx; + + tmp = (struct r600_texture*)texture; + rbuffer = (struct r600_buffer*)tmp->buffer; + rtexture->state->bo[0] = radeon_bo_incref(rscreen->rw, rbuffer->bo); + rtexture->state->bo[1] = radeon_bo_incref(rscreen->rw, rbuffer->bo); + rtexture->state->nbo = 2; + rtexture->state->placement[0] = RADEON_GEM_DOMAIN_GTT; + rtexture->state->placement[1] = RADEON_GEM_DOMAIN_GTT; + rtexture->state->placement[2] = RADEON_GEM_DOMAIN_GTT; + rtexture->state->placement[3] = RADEON_GEM_DOMAIN_GTT; + + /* FIXME properly handle first level != 0 */ + rtexture->state->states[R600_PS_RESOURCE__RESOURCE0_WORD0] = + S_038000_DIM(r600_tex_dim(texture->target)) | + S_038000_PITCH((tmp->pitch[0] / 8) - 1) | + S_038000_TEX_WIDTH(texture->width0 - 1); + rtexture->state->states[R600_PS_RESOURCE__RESOURCE0_WORD1] = + S_038004_TEX_HEIGHT(texture->height0 - 1) | + S_038004_TEX_DEPTH(texture->depth0 - 1) | + S_038004_DATA_FORMAT(format); + rtexture->state->states[R600_PS_RESOURCE__RESOURCE0_WORD2] = 0; + rtexture->state->states[R600_PS_RESOURCE__RESOURCE0_WORD3] = tmp->offset[1] >> 8; + rtexture->state->states[R600_PS_RESOURCE__RESOURCE0_WORD4] = + S_038010_FORMAT_COMP_X(r600_format_type(UTIL_FORMAT_TYPE_UNSIGNED)) | + S_038010_FORMAT_COMP_Y(r600_format_type(UTIL_FORMAT_TYPE_UNSIGNED)) | + S_038010_FORMAT_COMP_Z(r600_format_type(UTIL_FORMAT_TYPE_UNSIGNED)) | + S_038010_FORMAT_COMP_W(r600_format_type(UTIL_FORMAT_TYPE_UNSIGNED)) | + S_038010_NUM_FORMAT_ALL(V_038010_SQ_NUM_FORMAT_NORM) | + S_038010_SRF_MODE_ALL(V_038010_SFR_MODE_NO_ZERO) | + S_038010_REQUEST_SIZE(1) | + S_038010_DST_SEL_X(r600_tex_swizzle(view->swizzle_r)) | + S_038010_DST_SEL_Y(r600_tex_swizzle(view->swizzle_g)) | + S_038010_DST_SEL_Z(r600_tex_swizzle(view->swizzle_b)) | + S_038010_DST_SEL_W(r600_tex_swizzle(view->swizzle_a)) | + S_038010_BASE_LEVEL(view->first_level); + rtexture->state->states[R600_PS_RESOURCE__RESOURCE0_WORD5] = + S_038014_LAST_LEVEL(view->last_level) | + S_038014_BASE_ARRAY(0) | + S_038014_LAST_ARRAY(0); + rtexture->state->states[R600_PS_RESOURCE__RESOURCE0_WORD6] = + S_038018_TYPE(V_038010_SQ_TEX_VTX_VALID_TEXTURE); + return &rtexture->view; } static void r600_sampler_view_destroy(struct pipe_context *ctx, struct pipe_sampler_view *view) { - FREE(view); + struct r600_texture_resource *texture; + + if (view == NULL) + return; + texture = LIST_ENTRY(struct r600_texture_resource, view, view); + radeon_state_decref(texture->state); + FREE(texture); } static void r600_set_fragment_sampler_views(struct pipe_context *ctx, unsigned count, struct pipe_sampler_view **views) { + struct r600_texture_resource *rtexture; + struct r600_context *rctx = r600_context(ctx); + struct pipe_sampler_view *tmp; + unsigned i; + + if (views == NULL) + return; + + for (i = 0; i < rctx->nps_view; i++) { + tmp = &rctx->ps_view[i]->view; + pipe_sampler_view_reference(&tmp, NULL); + rctx->ps_view[i] = NULL; + } + rctx->nps_view = count; + for (i = 0; i < count; i++) { + rtexture = LIST_ENTRY(struct r600_texture_resource, views[i], view); + rctx->ps_view[i] = rtexture; + tmp = NULL; + pipe_sampler_view_reference(&tmp, views[i]); + rtexture->state->id = R600_PS_RESOURCE + i; + } } static void r600_set_vertex_sampler_views(struct pipe_context *ctx, unsigned count, struct pipe_sampler_view **views) { + struct r600_texture_resource *rtexture; + struct r600_context *rctx = r600_context(ctx); + struct pipe_sampler_view *tmp; + unsigned i; + + if (views == NULL) + return; + + for (i = 0; i < rctx->nvs_view; i++) { + tmp = &rctx->vs_view[i]->view; + pipe_sampler_view_reference(&tmp, NULL); + rctx->vs_view[i] = NULL; + } + rctx->nps_view = count; + for (i = 0; i < count; i++) { + rtexture = LIST_ENTRY(struct r600_texture_resource, views[i], view); + rctx->vs_view[i] = rtexture; + tmp = NULL; + pipe_sampler_view_reference(&tmp, views[i]); + rtexture->state->id = R600_VS_RESOURCE + i; + } } static void r600_set_scissor_state(struct pipe_context *ctx, diff --git a/src/gallium/drivers/r600/r600_texture.c b/src/gallium/drivers/r600/r600_texture.c index 903cfad80a..1c219a5579 100644 --- a/src/gallium/drivers/r600/r600_texture.c +++ b/src/gallium/drivers/r600/r600_texture.c @@ -31,7 +31,9 @@ #include #include "state_tracker/drm_driver.h" #include "r600_screen.h" +#include "r600_context.h" #include "r600_texture.h" +#include "r600d.h" extern struct u_resource_vtbl r600_texture_vtbl; @@ -69,6 +71,8 @@ static void r600_setup_miptree(struct r600_screen *rscreen, struct r600_texture rtex->offset[i] = offset; rtex->layer_size[i] = layer_size; rtex->pitch[i] = stride / util_format_get_blocksize(ptex->format); + rtex->pitch[i] += R600_TEXEL_PITCH_ALIGNMENT_MASK; + rtex->pitch[i] &= ~R600_TEXEL_PITCH_ALIGNMENT_MASK; rtex->stride[i] = stride; offset += align(size, 32); } diff --git a/src/gallium/drivers/r600/r600d.h b/src/gallium/drivers/r600/r600d.h index 44834984c6..593b95c9c7 100644 --- a/src/gallium/drivers/r600/r600d.h +++ b/src/gallium/drivers/r600/r600d.h @@ -26,6 +26,8 @@ #ifndef R600D_H #define R600D_H +#define R600_TEXEL_PITCH_ALIGNMENT_MASK 0x7 + #define PKT3_NOP 0x10 #define PKT3_INDIRECT_BUFFER_END 0x17 #define PKT3_SET_PREDICATION 0x20 @@ -574,6 +576,132 @@ #define S_0287F0_USE_OPAQUE(x) (((x) & 0x1) << 6) #define G_0287F0_USE_OPAQUE(x) (((x) >> 6) & 0x1) #define C_0287F0_USE_OPAQUE 0xFFFFFFBF +#define R_038000_SQ_TEX_RESOURCE_WORD0_0 0x038000 +#define S_038000_DIM(x) (((x) & 0x7) << 0) +#define G_038000_DIM(x) (((x) >> 0) & 0x7) +#define C_038000_DIM 0xFFFFFFF8 +#define V_038000_SQ_TEX_DIM_1D 0x00000000 +#define V_038000_SQ_TEX_DIM_2D 0x00000001 +#define V_038000_SQ_TEX_DIM_3D 0x00000002 +#define V_038000_SQ_TEX_DIM_CUBEMAP 0x00000003 +#define V_038000_SQ_TEX_DIM_1D_ARRAY 0x00000004 +#define V_038000_SQ_TEX_DIM_2D_ARRAY 0x00000005 +#define V_038000_SQ_TEX_DIM_2D_MSAA 0x00000006 +#define V_038000_SQ_TEX_DIM_2D_ARRAY_MSAA 0x00000007 +#define S_038000_TILE_MODE(x) (((x) & 0xF) << 3) +#define G_038000_TILE_MODE(x) (((x) >> 3) & 0xF) +#define C_038000_TILE_MODE 0xFFFFFF87 +#define S_038000_TILE_TYPE(x) (((x) & 0x1) << 7) +#define G_038000_TILE_TYPE(x) (((x) >> 7) & 0x1) +#define C_038000_TILE_TYPE 0xFFFFFF7F +#define S_038000_PITCH(x) (((x) & 0x7FF) << 8) +#define G_038000_PITCH(x) (((x) >> 8) & 0x7FF) +#define C_038000_PITCH 0xFFF800FF +#define S_038000_TEX_WIDTH(x) (((x) & 0x1FFF) << 19) +#define G_038000_TEX_WIDTH(x) (((x) >> 19) & 0x1FFF) +#define C_038000_TEX_WIDTH 0x0007FFFF +#define R_038004_SQ_TEX_RESOURCE_WORD1_0 0x038004 +#define S_038004_TEX_HEIGHT(x) (((x) & 0x1FFF) << 0) +#define G_038004_TEX_HEIGHT(x) (((x) >> 0) & 0x1FFF) +#define C_038004_TEX_HEIGHT 0xFFFFE000 +#define S_038004_TEX_DEPTH(x) (((x) & 0x1FFF) << 13) +#define G_038004_TEX_DEPTH(x) (((x) >> 13) & 0x1FFF) +#define C_038004_TEX_DEPTH 0xFC001FFF +#define S_038004_DATA_FORMAT(x) (((x) & 0x3F) << 26) +#define G_038004_DATA_FORMAT(x) (((x) >> 26) & 0x3F) +#define C_038004_DATA_FORMAT 0x03FFFFFF +#define R_038008_SQ_TEX_RESOURCE_WORD2_0 0x038008 +#define S_038008_BASE_ADDRESS(x) (((x) & 0xFFFFFFFF) << 0) +#define G_038008_BASE_ADDRESS(x) (((x) >> 0) & 0xFFFFFFFF) +#define C_038008_BASE_ADDRESS 0x00000000 +#define R_03800C_SQ_TEX_RESOURCE_WORD3_0 0x03800C +#define S_03800C_MIP_ADDRESS(x) (((x) & 0xFFFFFFFF) << 0) +#define G_03800C_MIP_ADDRESS(x) (((x) >> 0) & 0xFFFFFFFF) +#define C_03800C_MIP_ADDRESS 0x00000000 +#define R_038010_SQ_TEX_RESOURCE_WORD4_0 0x038010 +#define S_038010_FORMAT_COMP_X(x) (((x) & 0x3) << 0) +#define G_038010_FORMAT_COMP_X(x) (((x) >> 0) & 0x3) +#define C_038010_FORMAT_COMP_X 0xFFFFFFFC +#define V_038010_SQ_FORMAT_COMP_UNSIGNED 0x00000000 +#define V_038010_SQ_FORMAT_COMP_SIGNED 0x00000001 +#define V_038010_SQ_FORMAT_COMP_UNSIGNED_BIASED 0x00000002 +#define S_038010_FORMAT_COMP_Y(x) (((x) & 0x3) << 2) +#define G_038010_FORMAT_COMP_Y(x) (((x) >> 2) & 0x3) +#define C_038010_FORMAT_COMP_Y 0xFFFFFFF3 +#define S_038010_FORMAT_COMP_Z(x) (((x) & 0x3) << 4) +#define G_038010_FORMAT_COMP_Z(x) (((x) >> 4) & 0x3) +#define C_038010_FORMAT_COMP_Z 0xFFFFFFCF +#define S_038010_FORMAT_COMP_W(x) (((x) & 0x3) << 6) +#define G_038010_FORMAT_COMP_W(x) (((x) >> 6) & 0x3) +#define C_038010_FORMAT_COMP_W 0xFFFFFF3F +#define S_038010_NUM_FORMAT_ALL(x) (((x) & 0x3) << 8) +#define G_038010_NUM_FORMAT_ALL(x) (((x) >> 8) & 0x3) +#define C_038010_NUM_FORMAT_ALL 0xFFFFFCFF +#define V_038010_SQ_NUM_FORMAT_NORM 0x00000000 +#define V_038010_SQ_NUM_FORMAT_INT 0x00000001 +#define V_038010_SQ_NUM_FORMAT_SCALED 0x00000002 +#define S_038010_SRF_MODE_ALL(x) (((x) & 0x1) << 10) +#define G_038010_SRF_MODE_ALL(x) (((x) >> 10) & 0x1) +#define C_038010_SRF_MODE_ALL 0xFFFFFBFF +#define V_038010_SFR_MODE_ZERO_CLAMP_MINUS_ONE 0x00000000 +#define V_038010_SFR_MODE_NO_ZERO 0x00000001 +#define S_038010_FORCE_DEGAMMA(x) (((x) & 0x1) << 11) +#define G_038010_FORCE_DEGAMMA(x) (((x) >> 11) & 0x1) +#define C_038010_FORCE_DEGAMMA 0xFFFFF7FF +#define S_038010_ENDIAN_SWAP(x) (((x) & 0x3) << 12) +#define G_038010_ENDIAN_SWAP(x) (((x) >> 12) & 0x3) +#define C_038010_ENDIAN_SWAP 0xFFFFCFFF +#define S_038010_REQUEST_SIZE(x) (((x) & 0x3) << 14) +#define G_038010_REQUEST_SIZE(x) (((x) >> 14) & 0x3) +#define C_038010_REQUEST_SIZE 0xFFFF3FFF +#define S_038010_DST_SEL_X(x) (((x) & 0x7) << 16) +#define G_038010_DST_SEL_X(x) (((x) >> 16) & 0x7) +#define C_038010_DST_SEL_X 0xFFF8FFFF +#define V_038010_SQ_SEL_X 0x00000000 +#define V_038010_SQ_SEL_Y 0x00000001 +#define V_038010_SQ_SEL_Z 0x00000002 +#define V_038010_SQ_SEL_W 0x00000003 +#define V_038010_SQ_SEL_0 0x00000004 +#define V_038010_SQ_SEL_1 0x00000005 +#define S_038010_DST_SEL_Y(x) (((x) & 0x7) << 19) +#define G_038010_DST_SEL_Y(x) (((x) >> 19) & 0x7) +#define C_038010_DST_SEL_Y 0xFFC7FFFF +#define S_038010_DST_SEL_Z(x) (((x) & 0x7) << 22) +#define G_038010_DST_SEL_Z(x) (((x) >> 22) & 0x7) +#define C_038010_DST_SEL_Z 0xFE3FFFFF +#define S_038010_DST_SEL_W(x) (((x) & 0x7) << 25) +#define G_038010_DST_SEL_W(x) (((x) >> 25) & 0x7) +#define C_038010_DST_SEL_W 0xF1FFFFFF +#define S_038010_BASE_LEVEL(x) (((x) & 0xF) << 28) +#define G_038010_BASE_LEVEL(x) (((x) >> 28) & 0xF) +#define C_038010_BASE_LEVEL 0x0FFFFFFF +#define R_038014_SQ_TEX_RESOURCE_WORD5_0 0x038014 +#define S_038014_LAST_LEVEL(x) (((x) & 0xF) << 0) +#define G_038014_LAST_LEVEL(x) (((x) >> 0) & 0xF) +#define C_038014_LAST_LEVEL 0xFFFFFFF0 +#define S_038014_BASE_ARRAY(x) (((x) & 0x1FFF) << 4) +#define G_038014_BASE_ARRAY(x) (((x) >> 4) & 0x1FFF) +#define C_038014_BASE_ARRAY 0xFFFE000F +#define S_038014_LAST_ARRAY(x) (((x) & 0x1FFF) << 17) +#define G_038014_LAST_ARRAY(x) (((x) >> 17) & 0x1FFF) +#define C_038014_LAST_ARRAY 0xC001FFFF +#define R_038018_SQ_TEX_RESOURCE_WORD6_0 0x038018 +#define S_038018_MPEG_CLAMP(x) (((x) & 0x3) << 0) +#define G_038018_MPEG_CLAMP(x) (((x) >> 0) & 0x3) +#define C_038018_MPEG_CLAMP 0xFFFFFFFC +#define S_038018_PERF_MODULATION(x) (((x) & 0x7) << 5) +#define G_038018_PERF_MODULATION(x) (((x) >> 5) & 0x7) +#define C_038018_PERF_MODULATION 0xFFFFFF1F +#define S_038018_INTERLACED(x) (((x) & 0x1) << 8) +#define G_038018_INTERLACED(x) (((x) >> 8) & 0x1) +#define C_038018_INTERLACED 0xFFFFFEFF +#define S_038018_TYPE(x) (((x) & 0x3) << 30) +#define G_038018_TYPE(x) (((x) >> 30) & 0x3) +#define C_038018_TYPE 0x3FFFFFFF +#define V_038010_SQ_TEX_VTX_INVALID_TEXTURE 0x00000000 +#define V_038010_SQ_TEX_VTX_INVALID_BUFFER 0x00000001 +#define V_038010_SQ_TEX_VTX_VALID_TEXTURE 0x00000002 +#define V_038010_SQ_TEX_VTX_VALID_BUFFER 0x00000003 #define R_038008_SQ_VTX_CONSTANT_WORD2_0 0x038008 #define S_038008_BASE_ADDRESS_HI(x) (((x) & 0xFF) << 0) #define G_038008_BASE_ADDRESS_HI(x) (((x) >> 0) & 0xFF) @@ -633,6 +761,113 @@ #define S_038008_ENDIAN_SWAP(x) (((x) & 0x3) << 30) #define G_038008_ENDIAN_SWAP(x) (((x) >> 30) & 0x3) #define C_038008_ENDIAN_SWAP 0x3FFFFFFF +#define R_03C000_SQ_TEX_SAMPLER_WORD0_0 0x03C000 +#define S_03C000_CLAMP_X(x) (((x) & 0x7) << 0) +#define G_03C000_CLAMP_X(x) (((x) >> 0) & 0x7) +#define C_03C000_CLAMP_X 0xFFFFFFF8 +#define V_03C000_SQ_TEX_WRAP 0x00000000 +#define V_03C000_SQ_TEX_MIRROR 0x00000001 +#define V_03C000_SQ_TEX_CLAMP_LAST_TEXEL 0x00000002 +#define V_03C000_SQ_TEX_MIRROR_ONCE_LAST_TEXEL 0x00000003 +#define V_03C000_SQ_TEX_CLAMP_HALF_BORDER 0x00000004 +#define V_03C000_SQ_TEX_MIRROR_ONCE_HALF_BORDER 0x00000005 +#define V_03C000_SQ_TEX_CLAMP_BORDER 0x00000006 +#define V_03C000_SQ_TEX_MIRROR_ONCE_BORDER 0x00000007 +#define S_03C000_CLAMP_Y(x) (((x) & 0x7) << 3) +#define G_03C000_CLAMP_Y(x) (((x) >> 3) & 0x7) +#define C_03C000_CLAMP_Y 0xFFFFFFC7 +#define S_03C000_CLAMP_Z(x) (((x) & 0x7) << 6) +#define G_03C000_CLAMP_Z(x) (((x) >> 6) & 0x7) +#define C_03C000_CLAMP_Z 0xFFFFFE3F +#define S_03C000_XY_MAG_FILTER(x) (((x) & 0x7) << 9) +#define G_03C000_XY_MAG_FILTER(x) (((x) >> 9) & 0x7) +#define C_03C000_XY_MAG_FILTER 0xFFFFF1FF +#define V_03C000_SQ_TEX_XY_FILTER_POINT 0x00000000 +#define V_03C000_SQ_TEX_XY_FILTER_BILINEAR 0x00000001 +#define V_03C000_SQ_TEX_XY_FILTER_BICUBIC 0x00000002 +#define S_03C000_XY_MIN_FILTER(x) (((x) & 0x7) << 12) +#define G_03C000_XY_MIN_FILTER(x) (((x) >> 12) & 0x7) +#define C_03C000_XY_MIN_FILTER 0xFFFF8FFF +#define S_03C000_Z_FILTER(x) (((x) & 0x3) << 15) +#define G_03C000_Z_FILTER(x) (((x) >> 15) & 0x3) +#define C_03C000_Z_FILTER 0xFFFE7FFF +#define V_03C000_SQ_TEX_Z_FILTER_NONE 0x00000000 +#define V_03C000_SQ_TEX_Z_FILTER_POINT 0x00000001 +#define V_03C000_SQ_TEX_Z_FILTER_LINEAR 0x00000002 +#define S_03C000_MIP_FILTER(x) (((x) & 0x3) << 17) +#define G_03C000_MIP_FILTER(x) (((x) >> 17) & 0x3) +#define C_03C000_MIP_FILTER 0xFFF9FFFF +#define S_03C000_BORDER_COLOR_TYPE(x) (((x) & 0x3) << 22) +#define G_03C000_BORDER_COLOR_TYPE(x) (((x) >> 22) & 0x3) +#define C_03C000_BORDER_COLOR_TYPE 0xFF3FFFFF +#define V_03C000_SQ_TEX_BORDER_COLOR_TRANS_BLACK 0x00000000 +#define V_03C000_SQ_TEX_BORDER_COLOR_OPAQUE_BLACK 0x00000001 +#define V_03C000_SQ_TEX_BORDER_COLOR_OPAQUE_WHITE 0x00000002 +#define V_03C000_SQ_TEX_BORDER_COLOR_REGISTER 0x00000003 +#define S_03C000_POINT_SAMPLING_CLAMP(x) (((x) & 0x1) << 24) +#define G_03C000_POINT_SAMPLING_CLAMP(x) (((x) >> 24) & 0x1) +#define C_03C000_POINT_SAMPLING_CLAMP 0xFEFFFFFF +#define S_03C000_TEX_ARRAY_OVERRIDE(x) (((x) & 0x1) << 25) +#define G_03C000_TEX_ARRAY_OVERRIDE(x) (((x) >> 25) & 0x1) +#define C_03C000_TEX_ARRAY_OVERRIDE 0xFDFFFFFF +#define S_03C000_DEPTH_COMPARE_FUNCTION(x) (((x) & 0x7) << 26) +#define G_03C000_DEPTH_COMPARE_FUNCTION(x) (((x) >> 26) & 0x7) +#define C_03C000_DEPTH_COMPARE_FUNCTION 0xE3FFFFFF +#define V_03C000_SQ_TEX_DEPTH_COMPARE_NEVER 0x00000000 +#define V_03C000_SQ_TEX_DEPTH_COMPARE_LESS 0x00000001 +#define V_03C000_SQ_TEX_DEPTH_COMPARE_EQUAL 0x00000002 +#define V_03C000_SQ_TEX_DEPTH_COMPARE_LESSEQUAL 0x00000003 +#define V_03C000_SQ_TEX_DEPTH_COMPARE_GREATER 0x00000004 +#define V_03C000_SQ_TEX_DEPTH_COMPARE_NOTEQUAL 0x00000005 +#define V_03C000_SQ_TEX_DEPTH_COMPARE_GREATEREQUAL 0x00000006 +#define V_03C000_SQ_TEX_DEPTH_COMPARE_ALWAYS 0x00000007 +#define S_03C000_CHROMA_KEY(x) (((x) & 0x3) << 29) +#define G_03C000_CHROMA_KEY(x) (((x) >> 29) & 0x3) +#define C_03C000_CHROMA_KEY 0x9FFFFFFF +#define V_03C000_SQ_TEX_CHROMA_KEY_DISABLE 0x00000000 +#define V_03C000_SQ_TEX_CHROMA_KEY_KILL 0x00000001 +#define V_03C000_SQ_TEX_CHROMA_KEY_BLEND 0x00000002 +#define S_03C000_LOD_USES_MINOR_AXIS(x) (((x) & 0x1) << 31) +#define G_03C000_LOD_USES_MINOR_AXIS(x) (((x) >> 31) & 0x1) +#define C_03C000_LOD_USES_MINOR_AXIS 0x7FFFFFFF +#define R_03C004_SQ_TEX_SAMPLER_WORD1_0 0x03C004 +#define S_03C004_MIN_LOD(x) (((x) & 0x3FF) << 0) +#define G_03C004_MIN_LOD(x) (((x) >> 0) & 0x3FF) +#define C_03C004_MIN_LOD 0xFFFFFC00 +#define S_03C004_MAX_LOD(x) (((x) & 0x3FF) << 10) +#define G_03C004_MAX_LOD(x) (((x) >> 10) & 0x3FF) +#define C_03C004_MAX_LOD 0xFFF003FF +#define S_03C004_LOD_BIAS(x) (((x) & 0xFFF) << 20) +#define G_03C004_LOD_BIAS(x) (((x) >> 20) & 0xFFF) +#define C_03C004_LOD_BIAS 0x000FFFFF +#define R_03C008_SQ_TEX_SAMPLER_WORD2_0 0x03C008 +#define S_03C008_LOD_BIAS_SEC(x) (((x) & 0xFFF) << 0) +#define G_03C008_LOD_BIAS_SEC(x) (((x) >> 0) & 0xFFF) +#define C_03C008_LOD_BIAS_SEC 0xFFFFF000 +#define S_03C008_MC_COORD_TRUNCATE(x) (((x) & 0x1) << 12) +#define G_03C008_MC_COORD_TRUNCATE(x) (((x) >> 12) & 0x1) +#define C_03C008_MC_COORD_TRUNCATE 0xFFFFEFFF +#define S_03C008_FORCE_DEGAMMA(x) (((x) & 0x1) << 13) +#define G_03C008_FORCE_DEGAMMA(x) (((x) >> 13) & 0x1) +#define C_03C008_FORCE_DEGAMMA 0xFFFFDFFF +#define S_03C008_HIGH_PRECISION_FILTER(x) (((x) & 0x1) << 14) +#define G_03C008_HIGH_PRECISION_FILTER(x) (((x) >> 14) & 0x1) +#define C_03C008_HIGH_PRECISION_FILTER 0xFFFFBFFF +#define S_03C008_PERF_MIP(x) (((x) & 0x7) << 15) +#define G_03C008_PERF_MIP(x) (((x) >> 15) & 0x7) +#define C_03C008_PERF_MIP 0xFFFC7FFF +#define S_03C008_PERF_Z(x) (((x) & 0x3) << 18) +#define G_03C008_PERF_Z(x) (((x) >> 18) & 0x3) +#define C_03C008_PERF_Z 0xFFF3FFFF +#define S_03C008_FETCH_4(x) (((x) & 0x1) << 26) +#define G_03C008_FETCH_4(x) (((x) >> 26) & 0x1) +#define C_03C008_FETCH_4 0xFBFFFFFF +#define S_03C008_SAMPLE_IS_PCF(x) (((x) & 0x1) << 27) +#define G_03C008_SAMPLE_IS_PCF(x) (((x) >> 27) & 0x1) +#define C_03C008_SAMPLE_IS_PCF 0xF7FFFFFF +#define S_03C008_TYPE(x) (((x) & 0x1) << 31) +#define G_03C008_TYPE(x) (((x) >> 31) & 0x1) +#define C_03C008_TYPE 0x7FFFFFFF #define R_008958_VGT_PRIMITIVE_TYPE 0x008958 #define S_008958_PRIM_TYPE(x) (((x) & 0x3F) << 0) #define G_008958_PRIM_TYPE(x) (((x) >> 0) & 0x3F) -- cgit v1.2.3 From f514ad0a9b9bebde0808eadb1e63ea548260be1e Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Wed, 28 Jul 2010 11:51:16 +1000 Subject: r600g: use gallium util for float->ui conversion --- src/gallium/drivers/r600/r600_screen.h | 13 ------------- src/gallium/drivers/r600/r600_state.c | 12 ++++++------ 2 files changed, 6 insertions(+), 19 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/r600_screen.h b/src/gallium/drivers/r600/r600_screen.h index 0a0286d96b..7a373cd0ef 100644 --- a/src/gallium/drivers/r600/r600_screen.h +++ b/src/gallium/drivers/r600/r600_screen.h @@ -88,17 +88,4 @@ void r600_texture_transfer_unmap(struct pipe_context *ctx, int r600_conv_pipe_format(unsigned pformat, unsigned *format); int r600_conv_pipe_prim(unsigned pprim, unsigned *prim); -union r600_float_to_u32_u { - u32 u; - float f; -}; - -static inline u32 r600_float_to_u32(float f) -{ - union r600_float_to_u32_u c; - - c.f = f; - return c.u; -} - #endif diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index 6503c3740e..367a1f9991 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -579,12 +579,12 @@ static void r600_set_viewport_state(struct pipe_context *ctx, return; rstate->states[R600_VIEWPORT__PA_SC_VPORT_ZMIN_0] = 0x00000000; rstate->states[R600_VIEWPORT__PA_SC_VPORT_ZMAX_0] = 0x3F800000; - rstate->states[R600_VIEWPORT__PA_CL_VPORT_XSCALE_0] = r600_float_to_u32(state->scale[0]); - rstate->states[R600_VIEWPORT__PA_CL_VPORT_YSCALE_0] = r600_float_to_u32(state->scale[1]); - rstate->states[R600_VIEWPORT__PA_CL_VPORT_ZSCALE_0] = r600_float_to_u32(state->scale[2]); - rstate->states[R600_VIEWPORT__PA_CL_VPORT_XOFFSET_0] = r600_float_to_u32(state->translate[0]); - rstate->states[R600_VIEWPORT__PA_CL_VPORT_YOFFSET_0] = r600_float_to_u32(state->translate[1]); - rstate->states[R600_VIEWPORT__PA_CL_VPORT_ZOFFSET_0] = r600_float_to_u32(state->translate[2]); + rstate->states[R600_VIEWPORT__PA_CL_VPORT_XSCALE_0] = fui(state->scale[0]); + rstate->states[R600_VIEWPORT__PA_CL_VPORT_YSCALE_0] = fui(state->scale[1]); + rstate->states[R600_VIEWPORT__PA_CL_VPORT_ZSCALE_0] = fui(state->scale[2]); + rstate->states[R600_VIEWPORT__PA_CL_VPORT_XOFFSET_0] = fui(state->translate[0]); + rstate->states[R600_VIEWPORT__PA_CL_VPORT_YOFFSET_0] = fui(state->translate[1]); + rstate->states[R600_VIEWPORT__PA_CL_VPORT_ZOFFSET_0] = fui(state->translate[2]); rstate->states[R600_VIEWPORT__PA_CL_VTE_CNTL] = 0x0000043F; if (radeon_state_pm4(rstate)) { radeon_state_decref(rstate); -- cgit v1.2.3 From 2ab24a6faedb0f9b93055cbf3d52be1120353ee1 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Wed, 28 Jul 2010 15:26:14 +1000 Subject: r600g: fix up segfault with variation between views and count. For some reason gallium hands us something with lots of empty views, and we are expected to deal with it, just do what r300g does for this bit. --- src/gallium/drivers/r600/r600_state.c | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index 367a1f9991..2fdcdea14e 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -481,18 +481,26 @@ static void r600_set_fragment_sampler_views(struct pipe_context *ctx, struct r600_texture_resource *rtexture; struct r600_context *rctx = r600_context(ctx); struct pipe_sampler_view *tmp; - unsigned i; + unsigned i, real_num_views = 0; if (views == NULL) return; + for (i = 0; i < count; i++) { + if (views[i]) + real_num_views++; + } + for (i = 0; i < rctx->nps_view; i++) { tmp = &rctx->ps_view[i]->view; pipe_sampler_view_reference(&tmp, NULL); rctx->ps_view[i] = NULL; } - rctx->nps_view = count; + rctx->nps_view = real_num_views; for (i = 0; i < count; i++) { + + if (!views[i]) + continue; rtexture = LIST_ENTRY(struct r600_texture_resource, views[i], view); rctx->ps_view[i] = rtexture; tmp = NULL; @@ -508,18 +516,24 @@ static void r600_set_vertex_sampler_views(struct pipe_context *ctx, struct r600_texture_resource *rtexture; struct r600_context *rctx = r600_context(ctx); struct pipe_sampler_view *tmp; - unsigned i; + unsigned i, real_num_views = 0; if (views == NULL) return; + for (i = 0; i < count; i++) { + if (views[i]) + real_num_views++; + } for (i = 0; i < rctx->nvs_view; i++) { tmp = &rctx->vs_view[i]->view; pipe_sampler_view_reference(&tmp, NULL); rctx->vs_view[i] = NULL; } - rctx->nps_view = count; + rctx->nvs_view = real_num_views; for (i = 0; i < count; i++) { + if (!views[i]) + continue; rtexture = LIST_ENTRY(struct r600_texture_resource, views[i], view); rctx->vs_view[i] = rtexture; tmp = NULL; -- cgit v1.2.3 From 9961a0b92de7afed2afec62dadad08d76d1d3374 Mon Sep 17 00:00:00 2001 From: Jerome Glisse Date: Wed, 28 Jul 2010 09:11:14 -0400 Subject: r600g: actualy fix the literal emission Previous patch added literal emission to wrong place, we want to emit literal before emitting a new alu group. Signed-off-by: Jerome Glisse --- src/gallium/drivers/r600/r600_shader.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index 9796112775..a2d641dced 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -557,9 +557,6 @@ static int tgsi_slt(struct r600_shader_ctx *ctx) struct r600_bc_alu alu; int i, r; - r = r600_bc_add_literal(ctx->bc, ctx->value); - if (r) - return r; for (i = 0; i < 4; i++) { memset(&alu, 0, sizeof(struct r600_bc_alu)); if (!(inst->Dst[0].Register.WriteMask & (1 << i))) { @@ -739,6 +736,9 @@ static int tgsi_helper_copy(struct r600_shader_ctx *ctx, struct tgsi_full_instru struct r600_bc_alu alu; int i, r; + r = r600_bc_add_literal(ctx->bc, ctx->value); + if (r) + return r; for (i = 0; i < 4; i++) { memset(&alu, 0, sizeof(struct r600_bc_alu)); if (!(inst->Dst[0].Register.WriteMask & (1 << i))) { -- cgit v1.2.3 From b346c4205dc19c9ffbff48ed8bd89687772a96f8 Mon Sep 17 00:00:00 2001 From: Jerome Glisse Date: Wed, 28 Jul 2010 10:35:32 -0400 Subject: r600g: add lrp instruction support Signed-off-by: Jerome Glisse --- src/gallium/drivers/r600/r600_shader.c | 83 ++++++++++++++++++++++++++++++++-- 1 file changed, 80 insertions(+), 3 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index a2d641dced..3909c704e7 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -587,10 +587,8 @@ static int tgsi_lit(struct r600_shader_ctx *ctx) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; struct r600_bc_alu alu; - int r; - if (inst->Dst[0].Register.WriteMask & (1 << 0)) { /* dst.x, <- 1.0 */ @@ -930,6 +928,85 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) return r600_bc_add_tex(ctx->bc, &tex); } +static int tgsi_lrp(struct r600_shader_ctx *ctx) +{ + struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; + struct r600_bc_alu alu; + unsigned i; + int r; + + /* 1 - src0 */ + for (i = 0; i < 4; i++) { + memset(&alu, 0, sizeof(struct r600_bc_alu)); + alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD; + alu.src[0].sel = 249; + alu.src[0].chan = 0; + r = tgsi_src(ctx, &inst->Src[0], i, &alu.src[1]); + if (r) + return r; + alu.src[1].neg = 1; + alu.dst.sel = ctx->temp_reg; + alu.dst.chan = i; + if (i == 3) { + alu.last = 1; + } + alu.dst.write = 1; + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + } + r = r600_bc_add_literal(ctx->bc, ctx->value); + if (r) + return r; + + /* (1 - src0) * src2 */ + for (i = 0; i < 4; i++) { + memset(&alu, 0, sizeof(struct r600_bc_alu)); + alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL; + alu.src[0].sel = ctx->temp_reg; + alu.src[0].chan = i; + r = tgsi_src(ctx, &inst->Src[2], i, &alu.src[1]); + if (r) + return r; + alu.dst.sel = ctx->temp_reg; + alu.dst.chan = i; + if (i == 3) { + alu.last = 1; + } + alu.dst.write = 1; + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + } + r = r600_bc_add_literal(ctx->bc, ctx->value); + if (r) + return r; + + /* src0 * src1 + (1 - src0) * src2 */ + for (i = 0; i < 4; i++) { + memset(&alu, 0, sizeof(struct r600_bc_alu)); + alu.inst = V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD; + alu.is_op3 = 1; + r = tgsi_src(ctx, &inst->Src[0], i, &alu.src[0]); + if (r) + return r; + r = tgsi_src(ctx, &inst->Src[1], i, &alu.src[1]); + if (r) + return r; + alu.src[2].sel = ctx->temp_reg; + alu.src[2].chan = i; + alu.dst.sel = ctx->temp_reg; + alu.dst.chan = i; + if (i == 3) { + alu.last = 1; + } + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + } + return tgsi_helper_copy(ctx, inst); +} + static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = { {TGSI_OPCODE_ARL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_MOV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2}, @@ -949,7 +1026,7 @@ static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = { {TGSI_OPCODE_SGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_MAD, 1, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3}, {TGSI_OPCODE_SUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2}, - {TGSI_OPCODE_LRP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_LRP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp}, {TGSI_OPCODE_CND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, /* gap */ {20, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, -- cgit v1.2.3 From 742ee7935da60dda974795243d2e0fcf31accb59 Mon Sep 17 00:00:00 2001 From: Jerome Glisse Date: Wed, 28 Jul 2010 12:18:19 -0400 Subject: r600g: cleanup resource buffer/texture mess Use a common function, fix the mess it was before. Signed-off-by: Jerome Glisse --- src/gallium/drivers/r600/r600_buffer.c | 59 +++++------ src/gallium/drivers/r600/r600_context.c | 2 +- src/gallium/drivers/r600/r600_draw.c | 5 +- src/gallium/drivers/r600/r600_resource.c | 54 +++++----- src/gallium/drivers/r600/r600_resource.h | 35 ++++++- src/gallium/drivers/r600/r600_screen.c | 69 +------------ src/gallium/drivers/r600/r600_screen.h | 8 -- src/gallium/drivers/r600/r600_state.c | 28 +++--- src/gallium/drivers/r600/r600_texture.c | 163 +++++++++++++++++++------------ src/gallium/drivers/r600/r600_texture.h | 53 ---------- 10 files changed, 217 insertions(+), 259 deletions(-) delete mode 100644 src/gallium/drivers/r600/r600_texture.h (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/r600_buffer.c b/src/gallium/drivers/r600/r600_buffer.c index bc6e336ba7..167d117520 100644 --- a/src/gallium/drivers/r600/r600_buffer.c +++ b/src/gallium/drivers/r600/r600_buffer.c @@ -32,10 +32,11 @@ #include "state_tracker/drm_driver.h" #include "r600_screen.h" #include "r600_context.h" +#include "r600_resource.h" extern struct u_resource_vtbl r600_buffer_vtbl; -static u32 r600_domain_from_usage(unsigned usage) +u32 r600_domain_from_usage(unsigned usage) { u32 domain = RADEON_GEM_DOMAIN_GTT; @@ -63,47 +64,47 @@ struct pipe_resource *r600_buffer_create(struct pipe_screen *screen, const struct pipe_resource *templ) { struct r600_screen *rscreen = r600_screen(screen); - struct r600_buffer *rbuffer; + struct r600_resource *rbuffer; struct radeon_bo *bo; struct pb_desc desc; /* XXX We probably want a different alignment for buffers and textures. */ unsigned alignment = 4096; - rbuffer = CALLOC_STRUCT(r600_buffer); + rbuffer = CALLOC_STRUCT(r600_resource); if (rbuffer == NULL) return NULL; - rbuffer->b.b = *templ; - pipe_reference_init(&rbuffer->b.b.reference, 1); - rbuffer->b.b.screen = screen; - rbuffer->b.vtbl = &r600_buffer_vtbl; + rbuffer->base.b = *templ; + pipe_reference_init(&rbuffer->base.b.reference, 1); + rbuffer->base.b.screen = screen; + rbuffer->base.vtbl = &r600_buffer_vtbl; - if (rbuffer->b.b.bind & PIPE_BIND_CONSTANT_BUFFER) { + if (rbuffer->base.b.bind & PIPE_BIND_CONSTANT_BUFFER) { desc.alignment = alignment; - desc.usage = rbuffer->b.b.bind; - rbuffer->pb = pb_malloc_buffer_create(rbuffer->b.b.width0, + desc.usage = rbuffer->base.b.bind; + rbuffer->pb = pb_malloc_buffer_create(rbuffer->base.b.width0, &desc); if (rbuffer->pb == NULL) { free(rbuffer); return NULL; } - return &rbuffer->b.b; + return &rbuffer->base.b; } - rbuffer->domain = r600_domain_from_usage(rbuffer->b.b.bind); - bo = radeon_bo(rscreen->rw, 0, rbuffer->b.b.width0, alignment, NULL); + rbuffer->domain = r600_domain_from_usage(rbuffer->base.b.bind); + bo = radeon_bo(rscreen->rw, 0, rbuffer->base.b.width0, alignment, NULL); if (bo == NULL) { FREE(rbuffer); return NULL; } rbuffer->bo = bo; - return &rbuffer->b.b; + return &rbuffer->base.b; } struct pipe_resource *r600_user_buffer_create(struct pipe_screen *screen, void *ptr, unsigned bytes, unsigned bind) { - struct r600_buffer *rbuffer; + struct r600_resource *rbuffer; struct r600_screen *rscreen = r600_screen(screen); struct pipe_resource templ; @@ -116,20 +117,20 @@ struct pipe_resource *r600_user_buffer_create(struct pipe_screen *screen, templ.height0 = 1; templ.depth0 = 1; - rbuffer = (struct r600_buffer*)r600_buffer_create(screen, &templ); + rbuffer = (struct r600_resource*)r600_buffer_create(screen, &templ); if (rbuffer == NULL) { return NULL; } radeon_bo_map(rscreen->rw, rbuffer->bo); memcpy(rbuffer->bo->data, ptr, bytes); radeon_bo_unmap(rscreen->rw, rbuffer->bo); - return &rbuffer->b.b; + return &rbuffer->base.b; } static void r600_buffer_destroy(struct pipe_screen *screen, struct pipe_resource *buf) { - struct r600_buffer *rbuffer = (struct r600_buffer*)buf; + struct r600_resource *rbuffer = (struct r600_resource*)buf; struct r600_screen *rscreen = r600_screen(screen); if (rbuffer->pb) { @@ -146,7 +147,7 @@ static void r600_buffer_destroy(struct pipe_screen *screen, static void *r600_buffer_transfer_map(struct pipe_context *pipe, struct pipe_transfer *transfer) { - struct r600_buffer *rbuffer = (struct r600_buffer*)transfer->resource; + struct r600_resource *rbuffer = (struct r600_resource*)transfer->resource; struct r600_screen *rscreen = r600_screen(pipe->screen); int write = 0; @@ -166,9 +167,9 @@ static void *r600_buffer_transfer_map(struct pipe_context *pipe, } static void r600_buffer_transfer_unmap(struct pipe_context *pipe, - struct pipe_transfer *transfer) + struct pipe_transfer *transfer) { - struct r600_buffer *rbuffer = (struct r600_buffer*)transfer->resource; + struct r600_resource *rbuffer = (struct r600_resource*)transfer->resource; struct r600_screen *rscreen = r600_screen(pipe->screen); if (rbuffer->pb) { @@ -188,7 +189,7 @@ unsigned r600_buffer_is_referenced_by_cs(struct pipe_context *context, struct pipe_resource *buf, unsigned face, unsigned level) { - /* XXX */ + /* FIXME */ return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE; } @@ -196,7 +197,7 @@ struct pipe_resource *r600_buffer_from_handle(struct pipe_screen *screen, struct winsys_handle *whandle) { struct radeon *rw = (struct radeon*)screen->winsys; - struct r600_buffer *rbuffer; + struct r600_resource *rbuffer; struct radeon_bo *bo = NULL; bo = radeon_bo(rw, whandle->handle, 0, 0, NULL); @@ -204,18 +205,18 @@ struct pipe_resource *r600_buffer_from_handle(struct pipe_screen *screen, return NULL; } - rbuffer = CALLOC_STRUCT(r600_buffer); + rbuffer = CALLOC_STRUCT(r600_resource); if (rbuffer == NULL) { radeon_bo_decref(rw, bo); return NULL; } - pipe_reference_init(&rbuffer->b.b.reference, 1); - rbuffer->b.b.target = PIPE_BUFFER; - rbuffer->b.b.screen = screen; - rbuffer->b.vtbl = &r600_buffer_vtbl; + pipe_reference_init(&rbuffer->base.b.reference, 1); + rbuffer->base.b.target = PIPE_BUFFER; + rbuffer->base.b.screen = screen; + rbuffer->base.vtbl = &r600_buffer_vtbl; rbuffer->bo = bo; - return &rbuffer->b.b; + return &rbuffer->base.b; } struct u_resource_vtbl r600_buffer_vtbl = diff --git a/src/gallium/drivers/r600/r600_context.c b/src/gallium/drivers/r600/r600_context.c index 3c5195f79e..f2875f4380 100644 --- a/src/gallium/drivers/r600/r600_context.c +++ b/src/gallium/drivers/r600/r600_context.c @@ -29,9 +29,9 @@ #include #include #include -#include "r600_resource.h" #include "r600_screen.h" #include "r600_context.h" +#include "r600_resource.h" #include "r600d.h" static void r600_destroy_context(struct pipe_context *context) diff --git a/src/gallium/drivers/r600/r600_draw.c b/src/gallium/drivers/r600/r600_draw.c index e0d624889f..8e9d11b855 100644 --- a/src/gallium/drivers/r600/r600_draw.c +++ b/src/gallium/drivers/r600/r600_draw.c @@ -33,6 +33,7 @@ #include #include "r600_screen.h" #include "r600_context.h" +#include "r600_resource.h" #include "r600d.h" struct r600_draw { @@ -51,7 +52,7 @@ static int r600_draw_common(struct r600_draw *draw) struct r600_context *rctx = r600_context(draw->ctx); struct r600_screen *rscreen = rctx->screen; struct radeon_state *vs_resource; - struct r600_buffer *rbuffer; + struct r600_resource *rbuffer; unsigned i, j, offset, format, prim; u32 vgt_dma_index_type, vgt_draw_initiator; int r; @@ -101,7 +102,7 @@ static int r600_draw_common(struct r600_draw *draw) for (i = 0 ; i < rctx->vertex_elements->count; i++) { j = rctx->vertex_elements->elements[i].vertex_buffer_index; - rbuffer = (struct r600_buffer*)rctx->vertex_buffer[j].buffer; + rbuffer = (struct r600_resource*)rctx->vertex_buffer[j].buffer; offset = rctx->vertex_elements->elements[i].src_offset + rctx->vertex_buffer[j].buffer_offset; r = r600_conv_pipe_format(rctx->vertex_elements->elements[i].src_format, &format); if (r) diff --git a/src/gallium/drivers/r600/r600_resource.c b/src/gallium/drivers/r600/r600_resource.c index d9aa1df04f..292c5d294d 100644 --- a/src/gallium/drivers/r600/r600_resource.c +++ b/src/gallium/drivers/r600/r600_resource.c @@ -26,43 +26,43 @@ #include "r600_screen.h" #include "r600_texture.h" -static struct pipe_resource * -r600_resource_create(struct pipe_screen *screen, - const struct pipe_resource *templ) +static struct pipe_resource *r600_resource_create(struct pipe_screen *screen, + const struct pipe_resource *templ) { - if (templ->target == PIPE_BUFFER) - return r600_buffer_create(screen, templ); - else - return r600_texture_create(screen, templ); + if (templ->target == PIPE_BUFFER) { + return r600_buffer_create(screen, templ); + } else { + return r600_texture_create(screen, templ); + } } -static struct pipe_resource * -r600_resource_from_handle(struct pipe_screen * screen, - const struct pipe_resource *templ, - struct winsys_handle *whandle) +static struct pipe_resource *r600_resource_from_handle(struct pipe_screen * screen, + const struct pipe_resource *templ, + struct winsys_handle *whandle) { - if (templ->target == PIPE_BUFFER) - return NULL; - else - return r600_texture_from_handle(screen, templ, whandle); + if (templ->target == PIPE_BUFFER) { + return NULL; + } else { + return r600_texture_from_handle(screen, templ, whandle); + } } void r600_init_context_resource_functions(struct r600_context *r600) { - r600->context.get_transfer = u_get_transfer_vtbl; - r600->context.transfer_map = u_transfer_map_vtbl; - r600->context.transfer_flush_region = u_transfer_flush_region_vtbl; - r600->context.transfer_unmap = u_transfer_unmap_vtbl; - r600->context.transfer_destroy = u_transfer_destroy_vtbl; - r600->context.transfer_inline_write = u_transfer_inline_write_vtbl; - r600->context.is_resource_referenced = u_is_resource_referenced_vtbl; + r600->context.get_transfer = u_get_transfer_vtbl; + r600->context.transfer_map = u_transfer_map_vtbl; + r600->context.transfer_flush_region = u_transfer_flush_region_vtbl; + r600->context.transfer_unmap = u_transfer_unmap_vtbl; + r600->context.transfer_destroy = u_transfer_destroy_vtbl; + r600->context.transfer_inline_write = u_transfer_inline_write_vtbl; + r600->context.is_resource_referenced = u_is_resource_referenced_vtbl; } void r600_init_screen_resource_functions(struct r600_screen *r600screen) { - r600screen->screen.resource_create = r600_resource_create; - r600screen->screen.resource_from_handle = r600_resource_from_handle; - r600screen->screen.resource_get_handle = u_resource_get_handle_vtbl; - r600screen->screen.resource_destroy = u_resource_destroy_vtbl; - r600screen->screen.user_buffer_create = r600_user_buffer_create; + r600screen->screen.resource_create = r600_resource_create; + r600screen->screen.resource_from_handle = r600_resource_from_handle; + r600screen->screen.resource_get_handle = u_resource_get_handle_vtbl; + r600screen->screen.resource_destroy = u_resource_destroy_vtbl; + r600screen->screen.user_buffer_create = r600_user_buffer_create; } diff --git a/src/gallium/drivers/r600/r600_resource.h b/src/gallium/drivers/r600/r600_resource.h index 95084a371b..0139a3b777 100644 --- a/src/gallium/drivers/r600/r600_resource.h +++ b/src/gallium/drivers/r600/r600_resource.h @@ -20,14 +20,47 @@ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE * USE OR OTHER DEALINGS IN THE SOFTWARE. */ - #ifndef R600_RESOURCE_H #define R600_RESOURCE_H +#include "util/u_transfer.h" + struct r600_context; struct r600_screen; +/* This gets further specialized into either buffer or texture + * structures. Use the vtbl struct to choose between the two + * underlying implementations. + */ +struct r600_resource { + struct u_resource base; + struct radeon_bo *bo; + u32 domain; + u32 flink; + struct pb_buffer *pb; +}; + +struct r600_resource_texture { + struct r600_resource resource; + unsigned long offset[PIPE_MAX_TEXTURE_LEVELS]; + unsigned long pitch[PIPE_MAX_TEXTURE_LEVELS]; + unsigned long stride[PIPE_MAX_TEXTURE_LEVELS]; + unsigned long layer_size[PIPE_MAX_TEXTURE_LEVELS]; + unsigned long stride_override; + unsigned long size; +}; + void r600_init_context_resource_functions(struct r600_context *r600); void r600_init_screen_resource_functions(struct r600_screen *r600screen); +/* r600_buffer */ +u32 r600_domain_from_usage(unsigned usage); + +/* r600_texture */ +struct pipe_resource *r600_texture_create(struct pipe_screen *screen, + const struct pipe_resource *templ); +struct pipe_resource *r600_texture_from_handle(struct pipe_screen *screen, + const struct pipe_resource *base, + struct winsys_handle *whandle); + #endif diff --git a/src/gallium/drivers/r600/r600_screen.c b/src/gallium/drivers/r600/r600_screen.c index dec6fa8d27..e0d74ca558 100644 --- a/src/gallium/drivers/r600/r600_screen.c +++ b/src/gallium/drivers/r600/r600_screen.c @@ -24,15 +24,14 @@ * Jerome Glisse * Corbin Simpson */ -#include -#include -#include -#include "r600_resource.h" +#include +#include "util/u_inlines.h" +#include "util/u_format.h" +#include "util/u_memory.h" #include "r600_screen.h" -#include "r600_texture.h" #include "r600_context.h" #include "r600_public.h" -#include +#include "r600_resource.h" static const char* r600_get_vendor(struct pipe_screen* pscreen) { @@ -180,64 +179,6 @@ static boolean r600_is_format_supported(struct pipe_screen* screen, return FALSE; } -struct pipe_transfer* r600_texture_get_transfer(struct pipe_context *ctx, - struct pipe_resource *texture, - struct pipe_subresource sr, - unsigned usage, - const struct pipe_box *box) -{ - struct r600_texture *rtex = (struct r600_texture*)texture; - struct r600_transfer *trans; - - trans = CALLOC_STRUCT(r600_transfer); - if (trans == NULL) - return NULL; - pipe_resource_reference(&trans->transfer.resource, texture); - trans->transfer.sr = sr; - trans->transfer.usage = usage; - trans->transfer.box = *box; - trans->transfer.stride = rtex->stride[sr.level]; - trans->offset = r600_texture_get_offset(rtex, sr.level, box->z, sr.face); - return &trans->transfer; -} - -void r600_texture_transfer_destroy(struct pipe_context *ctx, - struct pipe_transfer *trans) -{ - pipe_resource_reference(&trans->resource, NULL); - FREE(trans); -} - -void* r600_texture_transfer_map(struct pipe_context *ctx, - struct pipe_transfer* transfer) -{ - struct r600_transfer *rtransfer = (struct r600_transfer*)transfer; - struct r600_texture *rtex = (struct r600_texture*)transfer->resource; - char *map; - enum pipe_format format = rtex->b.b.format; - - map = pipe_buffer_map(ctx, rtex->buffer, - transfer->usage, - &rtransfer->buffer_transfer); - - if (!map) { - return NULL; - } - - return map + rtransfer->offset + - transfer->box.y / util_format_get_blockheight(format) * transfer->stride + - transfer->box.x / util_format_get_blockwidth(format) * util_format_get_blocksize(format); -} - -void r600_texture_transfer_unmap(struct pipe_context *ctx, - struct pipe_transfer* transfer) -{ - struct r600_transfer *rtransfer = (struct r600_transfer*)transfer; - struct r600_texture *rtex = (struct r600_texture*)transfer->resource; - - pipe_buffer_unmap(ctx, rtex->buffer, rtransfer->buffer_transfer); -} - static void r600_destroy_screen(struct pipe_screen* pscreen) { struct r600_screen* rscreen = r600_screen(pscreen); diff --git a/src/gallium/drivers/r600/r600_screen.h b/src/gallium/drivers/r600/r600_screen.h index 7a373cd0ef..9a452ecfe3 100644 --- a/src/gallium/drivers/r600/r600_screen.h +++ b/src/gallium/drivers/r600/r600_screen.h @@ -40,14 +40,6 @@ struct r600_transfer { unsigned offset; }; -struct r600_buffer { - struct u_resource b; - struct radeon_bo *bo; - u32 domain; - u32 flink; - struct pb_buffer *pb; -}; - struct r600_screen { struct pipe_screen screen; struct radeon *rw; diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index 2fdcdea14e..7d67e28c06 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -24,12 +24,12 @@ * Jerome Glisse */ #include -#include -#include -#include +#include "util/u_inlines.h" +#include "util/u_format.h" +#include "util/u_memory.h" #include "r600_screen.h" -#include "r600_texture.h" #include "r600_context.h" +#include "r600_resource.h" #include "r600d.h" @@ -90,8 +90,8 @@ static void r600_set_framebuffer_state(struct pipe_context *ctx, { struct r600_screen *rscreen = r600_screen(ctx->screen); struct r600_context *rctx = r600_context(ctx); - struct r600_texture *rtex; - struct r600_buffer *rbuffer; + struct r600_resource_texture *rtex; + struct r600_resource *rbuffer; struct radeon_state *rstate; unsigned level = state->cbufs[0]->level; unsigned pitch, slice; @@ -99,8 +99,8 @@ static void r600_set_framebuffer_state(struct pipe_context *ctx, rstate = radeon_state(rscreen->rw, R600_CB0_TYPE, R600_CB0); if (rstate == NULL) return; - rtex = (struct r600_texture*)state->cbufs[0]->texture; - rbuffer = (struct r600_buffer*)rtex->buffer; + rtex = (struct r600_resource_texture*)state->cbufs[0]->texture; + rbuffer = &rtex->resource; rstate->bo[0] = radeon_bo_incref(rscreen->rw, rbuffer->bo); rstate->bo[1] = radeon_bo_incref(rscreen->rw, rbuffer->bo); rstate->bo[2] = radeon_bo_incref(rscreen->rw, rbuffer->bo); @@ -125,8 +125,8 @@ static void r600_set_framebuffer_state(struct pipe_context *ctx, radeon_draw_set_new(rctx->draw, rstate); rctx->db = radeon_state_decref(rctx->db); if(state->zsbuf) { - rtex = (struct r600_texture*)state->zsbuf->texture; - rbuffer = (struct r600_buffer*)rtex->buffer; + rtex = (struct r600_resource_texture*)state->zsbuf->texture; + rbuffer = &rtex->resource; rctx->db = radeon_state(rscreen->rw, R600_DB_TYPE, R600_DB); if(rctx->db == NULL) return; @@ -397,8 +397,8 @@ static struct pipe_sampler_view *r600_create_sampler_view(struct pipe_context *c struct r600_screen *rscreen = r600_screen(ctx->screen); struct r600_texture_resource *rtexture; const struct util_format_description *desc; - struct r600_texture *tmp; - struct r600_buffer *rbuffer; + struct r600_resource_texture *tmp; + struct r600_resource *rbuffer; unsigned format; if (r600_conv_pipe_format(texture->format, &format)) @@ -419,8 +419,8 @@ static struct pipe_sampler_view *r600_create_sampler_view(struct pipe_context *c pipe_resource_reference(&rtexture->view.texture, texture); rtexture->view.context = ctx; - tmp = (struct r600_texture*)texture; - rbuffer = (struct r600_buffer*)tmp->buffer; + tmp = (struct r600_resource_texture*)texture; + rbuffer = &tmp->resource; rtexture->state->bo[0] = radeon_bo_incref(rscreen->rw, rbuffer->bo); rtexture->state->bo[1] = radeon_bo_incref(rscreen->rw, rbuffer->bo); rtexture->state->nbo = 2; diff --git a/src/gallium/drivers/r600/r600_texture.c b/src/gallium/drivers/r600/r600_texture.c index 1c219a5579..ab20e97948 100644 --- a/src/gallium/drivers/r600/r600_texture.c +++ b/src/gallium/drivers/r600/r600_texture.c @@ -32,16 +32,18 @@ #include "state_tracker/drm_driver.h" #include "r600_screen.h" #include "r600_context.h" -#include "r600_texture.h" +#include "r600_resource.h" #include "r600d.h" extern struct u_resource_vtbl r600_texture_vtbl; -unsigned long r600_texture_get_offset(struct r600_texture *rtex, unsigned level, unsigned zslice, unsigned face) +static unsigned long r600_texture_get_offset(struct r600_resource_texture *rtex, + unsigned level, unsigned zslice, + unsigned face) { unsigned long offset = rtex->offset[level]; - switch (rtex->b.b.target) { + switch (rtex->resource.base.b.target) { case PIPE_TEXTURE_3D: assert(face == 0); return offset + zslice * rtex->layer_size[level]; @@ -54,9 +56,9 @@ unsigned long r600_texture_get_offset(struct r600_texture *rtex, unsigned level, } } -static void r600_setup_miptree(struct r600_screen *rscreen, struct r600_texture *rtex) +static void r600_setup_miptree(struct r600_screen *rscreen, struct r600_resource_texture *rtex) { - struct pipe_resource *ptex = &rtex->b.b; + struct pipe_resource *ptex = &rtex->resource.base.b; unsigned long w, h, stride, size, layer_size, i, offset; for (i = 0, offset = 0; i <= ptex->last_level; i++) { @@ -80,43 +82,44 @@ static void r600_setup_miptree(struct r600_screen *rscreen, struct r600_texture } struct pipe_resource *r600_texture_create(struct pipe_screen *screen, - const struct pipe_resource *templ) + const struct pipe_resource *templ) { - struct r600_texture *rtex = CALLOC_STRUCT(r600_texture); + struct r600_resource_texture *rtex; + struct r600_resource *resource; struct r600_screen *rscreen = r600_screen(screen); - struct pipe_resource templ_buf; + rtex = CALLOC_STRUCT(r600_resource_texture); if (!rtex) { return NULL; } - rtex->b.b = *templ; - rtex->b.vtbl = &r600_texture_vtbl; - pipe_reference_init(&rtex->b.b.reference, 1); - rtex->b.b.screen = screen; + resource = &rtex->resource; + resource->base.b = *templ; + resource->base.vtbl = &r600_texture_vtbl; + pipe_reference_init(&resource->base.b.reference, 1); + resource->base.b.screen = screen; r600_setup_miptree(rscreen, rtex); - memset(&templ_buf, 0, sizeof(struct pipe_resource)); - templ_buf.target = PIPE_BUFFER; - templ_buf.format = PIPE_FORMAT_R8_UNORM; - templ_buf.usage = templ->usage; - templ_buf.bind = templ->bind; - templ_buf.width0 = rtex->size; - templ_buf.height0 = 1; - templ_buf.depth0 = 1; - - rtex->buffer = screen->resource_create(screen, &templ_buf); - if (!rtex->buffer) { + /* FIXME alignment 4096 enought ? too much ? */ + resource->domain = r600_domain_from_usage(resource->base.b.bind); + resource->bo = radeon_bo(rscreen->rw, 0, rtex->size, 4096, NULL); + if (resource->bo == NULL) { FREE(rtex); return NULL; } - return &rtex->b.b; + + return &resource->base.b; } static void r600_texture_destroy(struct pipe_screen *screen, struct pipe_resource *ptex) { - struct r600_texture *rtex = (struct r600_texture*)ptex; + struct r600_resource_texture *rtex = (struct r600_resource_texture*)ptex; + struct r600_resource *resource = &rtex->resource; + struct r600_screen *rscreen = r600_screen(screen); + if (resource->bo) { + radeon_bo_decref(rscreen->rw, resource->bo); + } FREE(rtex); } @@ -125,7 +128,7 @@ static struct pipe_surface *r600_get_tex_surface(struct pipe_screen *screen, unsigned face, unsigned level, unsigned zslice, unsigned flags) { - struct r600_texture *rtex = (struct r600_texture*)texture; + struct r600_resource_texture *rtex = (struct r600_resource_texture*)texture; struct pipe_surface *surface = CALLOC_STRUCT(pipe_surface); unsigned long offset; @@ -153,71 +156,111 @@ static void r600_tex_surface_destroy(struct pipe_surface *surface) } struct pipe_resource *r600_texture_from_handle(struct pipe_screen *screen, - const struct pipe_resource *base, + const struct pipe_resource *templ, struct winsys_handle *whandle) { - struct pipe_resource *buffer; - struct r600_texture *rtex; + struct radeon *rw = (struct radeon*)screen->winsys; + struct r600_resource_texture *rtex; + struct r600_resource *resource; + struct radeon_bo *bo = NULL; - buffer = r600_buffer_from_handle(screen, whandle); - if (buffer == NULL) { + bo = radeon_bo(rw, whandle->handle, 0, 0, NULL); + if (bo == NULL) { return NULL; } /* Support only 2D textures without mipmaps */ - if (base->target != PIPE_TEXTURE_2D || base->depth0 != 1 || base->last_level != 0) + if (templ->target != PIPE_TEXTURE_2D || templ->depth0 != 1 || templ->last_level != 0) return NULL; - rtex = CALLOC_STRUCT(r600_texture); + rtex = CALLOC_STRUCT(r600_resource_texture); if (rtex == NULL) return NULL; - /* one ref already taken */ - rtex->buffer = buffer; - - rtex->b.b = *base; - rtex->b.vtbl = &r600_texture_vtbl; - pipe_reference_init(&rtex->b.b.reference, 1); - rtex->b.b.screen = screen; + resource = &rtex->resource; + resource->base.b = *templ; + resource->base.vtbl = &r600_texture_vtbl; + pipe_reference_init(&resource->base.b.reference, 1); + resource->base.b.screen = screen; + resource->bo = bo; rtex->stride_override = whandle->stride; - rtex->pitch[0] = whandle->stride / util_format_get_blocksize(base->format); + rtex->pitch[0] = whandle->stride / util_format_get_blocksize(templ->format); rtex->stride[0] = whandle->stride; rtex->offset[0] = 0; - rtex->size = align(rtex->stride[0] * base->height0, 32); + rtex->size = align(rtex->stride[0] * templ->height0, 32); - return &rtex->b.b; + return &resource->base.b; } -static boolean r600_texture_get_handle(struct pipe_screen* screen, - struct pipe_resource *texture, - struct winsys_handle *whandle) +static unsigned int r600_texture_is_referenced(struct pipe_context *context, + struct pipe_resource *texture, + unsigned face, unsigned level) { - struct r600_screen *rscreen = r600_screen(screen); - struct r600_texture* rtex = (struct r600_texture*)texture; + /* FIXME */ + return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE; +} - if (!rtex) { - return FALSE; - } +struct pipe_transfer* r600_texture_get_transfer(struct pipe_context *ctx, + struct pipe_resource *texture, + struct pipe_subresource sr, + unsigned usage, + const struct pipe_box *box) +{ + struct r600_resource_texture *rtex = (struct r600_resource_texture*)texture; + struct r600_transfer *trans; - whandle->stride = rtex->stride[0]; + trans = CALLOC_STRUCT(r600_transfer); + if (trans == NULL) + return NULL; + pipe_resource_reference(&trans->transfer.resource, texture); + trans->transfer.sr = sr; + trans->transfer.usage = usage; + trans->transfer.box = *box; + trans->transfer.stride = rtex->stride[sr.level]; + trans->offset = r600_texture_get_offset(rtex, sr.level, box->z, sr.face); + return &trans->transfer; +} + +void r600_texture_transfer_destroy(struct pipe_context *ctx, + struct pipe_transfer *trans) +{ + pipe_resource_reference(&trans->resource, NULL); + FREE(trans); +} + +void* r600_texture_transfer_map(struct pipe_context *ctx, + struct pipe_transfer* transfer) +{ + struct r600_transfer *rtransfer = (struct r600_transfer*)transfer; + struct r600_resource *resource; + enum pipe_format format = transfer->resource->format; + struct r600_screen *rscreen = r600_screen(ctx->screen); + char *map; - r600_buffer_get_handle(rscreen->rw, rtex->buffer, whandle); + resource = (struct r600_resource *)transfer->resource; + if (radeon_bo_map(rscreen->rw, resource->bo)) { + return NULL; + } + map = resource->bo->data; - return TRUE; + return map + rtransfer->offset + + transfer->box.y / util_format_get_blockheight(format) * transfer->stride + + transfer->box.x / util_format_get_blockwidth(format) * util_format_get_blocksize(format); } -static unsigned int r600_texture_is_referenced(struct pipe_context *context, - struct pipe_resource *texture, - unsigned face, unsigned level) +void r600_texture_transfer_unmap(struct pipe_context *ctx, + struct pipe_transfer* transfer) { - struct r600_texture *rtex = (struct r600_texture*)texture; + struct r600_screen *rscreen = r600_screen(ctx->screen); + struct r600_resource *resource; - return r600_buffer_is_referenced_by_cs(context, rtex->buffer, face, level); + resource = (struct r600_resource *)transfer->resource; + radeon_bo_unmap(rscreen->rw, resource->bo); } struct u_resource_vtbl r600_texture_vtbl = { - r600_texture_get_handle, /* get_handle */ + u_default_resource_get_handle, /* get_handle */ r600_texture_destroy, /* resource_destroy */ r600_texture_is_referenced, /* is_resource_referenced */ r600_texture_get_transfer, /* get_transfer */ diff --git a/src/gallium/drivers/r600/r600_texture.h b/src/gallium/drivers/r600/r600_texture.h deleted file mode 100644 index 9bc08d6b04..0000000000 --- a/src/gallium/drivers/r600/r600_texture.h +++ /dev/null @@ -1,53 +0,0 @@ -/* - * Copyright 2010 Jerome Glisse - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - */ -#ifndef R600_TEXTURE_H -#define R600_TEXTURE_H - -#include - -struct r600_texture { - struct u_resource b; - unsigned long offset[PIPE_MAX_TEXTURE_LEVELS]; - unsigned long pitch[PIPE_MAX_TEXTURE_LEVELS]; - unsigned long stride[PIPE_MAX_TEXTURE_LEVELS]; - unsigned long layer_size[PIPE_MAX_TEXTURE_LEVELS]; - unsigned long stride_override; - unsigned long size; - struct pipe_resource *buffer; -}; - -struct pipe_resource *r600_texture_create(struct pipe_screen *screen, - const struct pipe_resource *templ); -unsigned long r600_texture_get_offset(struct r600_texture *rtex, unsigned level, unsigned zslice, unsigned face); -struct pipe_resource *r600_texture_from_handle(struct pipe_screen *screen, - const struct pipe_resource *base, - struct winsys_handle *whandle); -void r600_init_screen_texture_functions(struct pipe_screen *screen); - -/* This should be implemented by winsys. */ -boolean r600_buffer_get_handle(struct radeon *rw, - struct pipe_resource *buf, - struct winsys_handle *whandle); - - -#endif -- cgit v1.2.3 From 6f0f6c64596b7bbbfa96e8af6715565e37efa91e Mon Sep 17 00:00:00 2001 From: Jerome Glisse Date: Wed, 28 Jul 2010 19:33:50 -0400 Subject: r600g: split pipe state creating/binding from hw state creation Split hw vs pipe states creation handling as hw states group doesn't match pipe state group exactly. Right now be dumb about that and rebuild all hw states on each draw call. More optimization on that side coming. Signed-off-by: Jerome Glisse --- src/gallium/drivers/r600/r600_blit.c | 48 +- src/gallium/drivers/r600/r600_buffer.c | 1 + src/gallium/drivers/r600/r600_context.c | 152 ++-- src/gallium/drivers/r600/r600_context.h | 147 +++- src/gallium/drivers/r600/r600_draw.c | 34 +- src/gallium/drivers/r600/r600_resource.c | 1 - src/gallium/drivers/r600/r600_shader.c | 47 +- src/gallium/drivers/r600/r600_state.c | 1273 +++++++++++++++++++----------- src/gallium/winsys/r600/drm/r600_drm.c | 4 +- 9 files changed, 1086 insertions(+), 621 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/r600_blit.c b/src/gallium/drivers/r600/r600_blit.c index 1dcb19babc..cc37227ead 100644 --- a/src/gallium/drivers/r600/r600_blit.c +++ b/src/gallium/drivers/r600/r600_blit.c @@ -34,31 +34,37 @@ static void r600_blitter_save_states(struct r600_context *rctx) { - util_blitter_save_blend(rctx->blitter, - rctx->draw->state[R600_BLEND]); - util_blitter_save_depth_stencil_alpha(rctx->blitter, - rctx->draw->state[R600_DSA]); - util_blitter_save_stencil_ref(rctx->blitter, &rctx->stencil_ref); - util_blitter_save_rasterizer(rctx->blitter, - rctx->draw->state[R600_RASTERIZER]); - util_blitter_save_fragment_shader(rctx->blitter, - rctx->ps_shader); - util_blitter_save_vertex_shader(rctx->blitter, - rctx->vs_shader); - util_blitter_save_vertex_elements(rctx->blitter, - rctx->vertex_elements); - util_blitter_save_viewport(rctx->blitter, - &rctx->viewport); + util_blitter_save_blend(rctx->blitter, rctx->blend); + util_blitter_save_depth_stencil_alpha(rctx->blitter, rctx->dsa); + if (rctx->stencil_ref) { + util_blitter_save_stencil_ref(rctx->blitter, + &rctx->stencil_ref->state.stencil_ref); + } + util_blitter_save_rasterizer(rctx->blitter, rctx->rasterizer); + util_blitter_save_fragment_shader(rctx->blitter, rctx->ps_shader); + util_blitter_save_vertex_shader(rctx->blitter, rctx->vs_shader); + util_blitter_save_vertex_elements(rctx->blitter, rctx->vertex_elements); + if (rctx->viewport) { + util_blitter_save_viewport(rctx->blitter, &rctx->viewport->state.viewport); + } /* XXX util_blitter_save_clip(rctx->blitter, &rctx->clip); */ util_blitter_save_vertex_buffers(rctx->blitter, rctx->nvertex_buffer, - rctx->vertex_buffer); + rctx->vertex_buffer); + + /* remove ptr so they don't get deleted */ + rctx->blend = NULL; + rctx->vs_shader = NULL; + rctx->ps_shader = NULL; + rctx->rasterizer = NULL; + rctx->dsa = NULL; + rctx->vertex_elements = NULL; } static void r600_clear(struct pipe_context *ctx, unsigned buffers, const float *rgba, double depth, unsigned stencil) { struct r600_context *rctx = r600_context(ctx); - struct pipe_framebuffer_state *fb = &rctx->fb_state; + struct pipe_framebuffer_state *fb = &rctx->framebuffer->state.framebuffer; r600_blitter_save_states(rctx); util_blitter_clear(rctx->blitter, fb->width, fb->height, @@ -73,12 +79,14 @@ static void r600_clear_render_target(struct pipe_context *pipe, unsigned width, unsigned height) { struct r600_context *rctx = r600_context(pipe); + struct pipe_framebuffer_state *fb = &rctx->framebuffer->state.framebuffer; r600_blitter_save_states(rctx); - util_blitter_save_framebuffer(rctx->blitter, &rctx->fb_state); + util_blitter_save_framebuffer(rctx->blitter, fb); util_blitter_clear_render_target(rctx->blitter, dst, rgba, dstx, dsty, width, height); +R600_ERR("vtx elem %p\n", rctx->vertex_elements); } static void r600_clear_depth_stencil(struct pipe_context *pipe, @@ -90,12 +98,14 @@ static void r600_clear_depth_stencil(struct pipe_context *pipe, unsigned width, unsigned height) { struct r600_context *rctx = r600_context(pipe); + struct pipe_framebuffer_state *fb = &rctx->framebuffer->state.framebuffer; r600_blitter_save_states(rctx); - util_blitter_save_framebuffer(rctx->blitter, &rctx->fb_state); + util_blitter_save_framebuffer(rctx->blitter, fb); util_blitter_clear_depth_stencil(rctx->blitter, dst, clear_flags, depth, stencil, dstx, dsty, width, height); +R600_ERR("vtx elem %p\n", rctx->vertex_elements); } static void r600_resource_copy_region(struct pipe_context *pipe, diff --git a/src/gallium/drivers/r600/r600_buffer.c b/src/gallium/drivers/r600/r600_buffer.c index 167d117520..7829a479c2 100644 --- a/src/gallium/drivers/r600/r600_buffer.c +++ b/src/gallium/drivers/r600/r600_buffer.c @@ -141,6 +141,7 @@ static void r600_buffer_destroy(struct pipe_screen *screen, if (rbuffer->bo) { radeon_bo_decref(rscreen->rw, rbuffer->bo); } + memset(rbuffer, 0, sizeof(struct r600_resource)); FREE(rbuffer); } diff --git a/src/gallium/drivers/r600/r600_context.c b/src/gallium/drivers/r600/r600_context.c index f2875f4380..4c7b67ea52 100644 --- a/src/gallium/drivers/r600/r600_context.c +++ b/src/gallium/drivers/r600/r600_context.c @@ -220,9 +220,9 @@ static void r600_init_config(struct r600_context *rctx) printf("num_gs_stack_entries : %d\n", num_gs_stack_entries); printf("num_es_stack_entries : %d\n", num_es_stack_entries); - rctx->config = radeon_state(rctx->rw, R600_CONFIG_TYPE, R600_CONFIG); + rctx->hw_states.config = radeon_state(rctx->rw, R600_CONFIG_TYPE, R600_CONFIG); - rctx->config->states[R600_CONFIG__SQ_CONFIG] = 0x00000000; + rctx->hw_states.config->states[R600_CONFIG__SQ_CONFIG] = 0x00000000; switch (family) { case CHIP_RV610: case CHIP_RV620: @@ -231,75 +231,75 @@ static void r600_init_config(struct r600_context *rctx) case CHIP_RV710: break; default: - rctx->config->states[R600_CONFIG__SQ_CONFIG] |= S_008C00_VC_ENABLE(1); + rctx->hw_states.config->states[R600_CONFIG__SQ_CONFIG] |= S_008C00_VC_ENABLE(1); break; } - rctx->config->states[R600_CONFIG__SQ_CONFIG] |= S_008C00_DX9_CONSTS(1); - rctx->config->states[R600_CONFIG__SQ_CONFIG] |= S_008C00_ALU_INST_PREFER_VECTOR(1); - rctx->config->states[R600_CONFIG__SQ_CONFIG] |= S_008C00_PS_PRIO(ps_prio); - rctx->config->states[R600_CONFIG__SQ_CONFIG] |= S_008C00_VS_PRIO(vs_prio); - rctx->config->states[R600_CONFIG__SQ_CONFIG] |= S_008C00_GS_PRIO(gs_prio); - rctx->config->states[R600_CONFIG__SQ_CONFIG] |= S_008C00_ES_PRIO(es_prio); + rctx->hw_states.config->states[R600_CONFIG__SQ_CONFIG] |= S_008C00_DX9_CONSTS(1); + rctx->hw_states.config->states[R600_CONFIG__SQ_CONFIG] |= S_008C00_ALU_INST_PREFER_VECTOR(1); + rctx->hw_states.config->states[R600_CONFIG__SQ_CONFIG] |= S_008C00_PS_PRIO(ps_prio); + rctx->hw_states.config->states[R600_CONFIG__SQ_CONFIG] |= S_008C00_VS_PRIO(vs_prio); + rctx->hw_states.config->states[R600_CONFIG__SQ_CONFIG] |= S_008C00_GS_PRIO(gs_prio); + rctx->hw_states.config->states[R600_CONFIG__SQ_CONFIG] |= S_008C00_ES_PRIO(es_prio); - rctx->config->states[R600_CONFIG__SQ_GPR_RESOURCE_MGMT_1] = 0; - rctx->config->states[R600_CONFIG__SQ_GPR_RESOURCE_MGMT_1] |= S_008C04_NUM_PS_GPRS(num_ps_gprs); - rctx->config->states[R600_CONFIG__SQ_GPR_RESOURCE_MGMT_1] |= S_008C04_NUM_VS_GPRS(num_vs_gprs); - rctx->config->states[R600_CONFIG__SQ_GPR_RESOURCE_MGMT_1] |= S_008C04_NUM_CLAUSE_TEMP_GPRS(num_temp_gprs); + rctx->hw_states.config->states[R600_CONFIG__SQ_GPR_RESOURCE_MGMT_1] = 0; + rctx->hw_states.config->states[R600_CONFIG__SQ_GPR_RESOURCE_MGMT_1] |= S_008C04_NUM_PS_GPRS(num_ps_gprs); + rctx->hw_states.config->states[R600_CONFIG__SQ_GPR_RESOURCE_MGMT_1] |= S_008C04_NUM_VS_GPRS(num_vs_gprs); + rctx->hw_states.config->states[R600_CONFIG__SQ_GPR_RESOURCE_MGMT_1] |= S_008C04_NUM_CLAUSE_TEMP_GPRS(num_temp_gprs); - rctx->config->states[R600_CONFIG__SQ_GPR_RESOURCE_MGMT_2] = 0; - rctx->config->states[R600_CONFIG__SQ_GPR_RESOURCE_MGMT_2] |= S_008C08_NUM_GS_GPRS(num_gs_gprs); - rctx->config->states[R600_CONFIG__SQ_GPR_RESOURCE_MGMT_2] |= S_008C08_NUM_GS_GPRS(num_es_gprs); + rctx->hw_states.config->states[R600_CONFIG__SQ_GPR_RESOURCE_MGMT_2] = 0; + rctx->hw_states.config->states[R600_CONFIG__SQ_GPR_RESOURCE_MGMT_2] |= S_008C08_NUM_GS_GPRS(num_gs_gprs); + rctx->hw_states.config->states[R600_CONFIG__SQ_GPR_RESOURCE_MGMT_2] |= S_008C08_NUM_GS_GPRS(num_es_gprs); - rctx->config->states[R600_CONFIG__SQ_THREAD_RESOURCE_MGMT] = 0; - rctx->config->states[R600_CONFIG__SQ_THREAD_RESOURCE_MGMT] |= S_008C0C_NUM_PS_THREADS(num_ps_threads); - rctx->config->states[R600_CONFIG__SQ_THREAD_RESOURCE_MGMT] |= S_008C0C_NUM_VS_THREADS(num_vs_threads); - rctx->config->states[R600_CONFIG__SQ_THREAD_RESOURCE_MGMT] |= S_008C0C_NUM_GS_THREADS(num_gs_threads); - rctx->config->states[R600_CONFIG__SQ_THREAD_RESOURCE_MGMT] |= S_008C0C_NUM_ES_THREADS(num_es_threads); + rctx->hw_states.config->states[R600_CONFIG__SQ_THREAD_RESOURCE_MGMT] = 0; + rctx->hw_states.config->states[R600_CONFIG__SQ_THREAD_RESOURCE_MGMT] |= S_008C0C_NUM_PS_THREADS(num_ps_threads); + rctx->hw_states.config->states[R600_CONFIG__SQ_THREAD_RESOURCE_MGMT] |= S_008C0C_NUM_VS_THREADS(num_vs_threads); + rctx->hw_states.config->states[R600_CONFIG__SQ_THREAD_RESOURCE_MGMT] |= S_008C0C_NUM_GS_THREADS(num_gs_threads); + rctx->hw_states.config->states[R600_CONFIG__SQ_THREAD_RESOURCE_MGMT] |= S_008C0C_NUM_ES_THREADS(num_es_threads); - rctx->config->states[R600_CONFIG__SQ_STACK_RESOURCE_MGMT_1] = 0; - rctx->config->states[R600_CONFIG__SQ_STACK_RESOURCE_MGMT_1] |= S_008C10_NUM_PS_STACK_ENTRIES(num_ps_stack_entries); - rctx->config->states[R600_CONFIG__SQ_STACK_RESOURCE_MGMT_1] |= S_008C10_NUM_VS_STACK_ENTRIES(num_vs_stack_entries); + rctx->hw_states.config->states[R600_CONFIG__SQ_STACK_RESOURCE_MGMT_1] = 0; + rctx->hw_states.config->states[R600_CONFIG__SQ_STACK_RESOURCE_MGMT_1] |= S_008C10_NUM_PS_STACK_ENTRIES(num_ps_stack_entries); + rctx->hw_states.config->states[R600_CONFIG__SQ_STACK_RESOURCE_MGMT_1] |= S_008C10_NUM_VS_STACK_ENTRIES(num_vs_stack_entries); - rctx->config->states[R600_CONFIG__SQ_STACK_RESOURCE_MGMT_2] = 0; - rctx->config->states[R600_CONFIG__SQ_STACK_RESOURCE_MGMT_2] |= S_008C14_NUM_GS_STACK_ENTRIES(num_gs_stack_entries); - rctx->config->states[R600_CONFIG__SQ_STACK_RESOURCE_MGMT_2] |= S_008C14_NUM_ES_STACK_ENTRIES(num_es_stack_entries); + rctx->hw_states.config->states[R600_CONFIG__SQ_STACK_RESOURCE_MGMT_2] = 0; + rctx->hw_states.config->states[R600_CONFIG__SQ_STACK_RESOURCE_MGMT_2] |= S_008C14_NUM_GS_STACK_ENTRIES(num_gs_stack_entries); + rctx->hw_states.config->states[R600_CONFIG__SQ_STACK_RESOURCE_MGMT_2] |= S_008C14_NUM_ES_STACK_ENTRIES(num_es_stack_entries); - rctx->config->states[R600_CONFIG__SQ_DYN_GPR_CNTL_PS_FLUSH_REQ] = 0x00004000; - rctx->config->states[R600_CONFIG__TA_CNTL_AUX] = 0x07000002; - rctx->config->states[R600_CONFIG__VC_ENHANCE] = 0x00000000; - rctx->config->states[R600_CONFIG__DB_DEBUG] = 0x00000000; - rctx->config->states[R600_CONFIG__DB_WATERMARKS] = 0x00420204; - rctx->config->states[R600_CONFIG__SX_MISC] = 0x00000000; - rctx->config->states[R600_CONFIG__SPI_THREAD_GROUPING] = 0x00000001; - rctx->config->states[R600_CONFIG__CB_SHADER_CONTROL] = 0x00000003; - rctx->config->states[R600_CONFIG__SQ_ESGS_RING_ITEMSIZE] = 0x00000000; - rctx->config->states[R600_CONFIG__SQ_GSVS_RING_ITEMSIZE] = 0x00000000; - rctx->config->states[R600_CONFIG__SQ_ESTMP_RING_ITEMSIZE] = 0x00000000; - rctx->config->states[R600_CONFIG__SQ_GSTMP_RING_ITEMSIZE] = 0x00000000; - rctx->config->states[R600_CONFIG__SQ_VSTMP_RING_ITEMSIZE] = 0x00000000; - rctx->config->states[R600_CONFIG__SQ_PSTMP_RING_ITEMSIZE] = 0x00000000; - rctx->config->states[R600_CONFIG__SQ_FBUF_RING_ITEMSIZE] = 0x00000000; - rctx->config->states[R600_CONFIG__SQ_REDUC_RING_ITEMSIZE] = 0x00000000; - rctx->config->states[R600_CONFIG__SQ_GS_VERT_ITEMSIZE] = 0x00000000; - rctx->config->states[R600_CONFIG__VGT_OUTPUT_PATH_CNTL] = 0x00000000; - rctx->config->states[R600_CONFIG__VGT_HOS_CNTL] = 0x00000000; - rctx->config->states[R600_CONFIG__VGT_HOS_MAX_TESS_LEVEL] = 0x00000000; - rctx->config->states[R600_CONFIG__VGT_HOS_MIN_TESS_LEVEL] = 0x00000000; - rctx->config->states[R600_CONFIG__VGT_HOS_REUSE_DEPTH] = 0x00000000; - rctx->config->states[R600_CONFIG__VGT_GROUP_PRIM_TYPE] = 0x00000000; - rctx->config->states[R600_CONFIG__VGT_GROUP_FIRST_DECR] = 0x00000000; - rctx->config->states[R600_CONFIG__VGT_GROUP_DECR] = 0x00000000; - rctx->config->states[R600_CONFIG__VGT_GROUP_VECT_0_CNTL] = 0x00000000; - rctx->config->states[R600_CONFIG__VGT_GROUP_VECT_1_CNTL] = 0x00000000; - rctx->config->states[R600_CONFIG__VGT_GROUP_VECT_0_FMT_CNTL] = 0x00000000; - rctx->config->states[R600_CONFIG__VGT_GROUP_VECT_1_FMT_CNTL] = 0x00000000; - rctx->config->states[R600_CONFIG__VGT_GS_MODE] = 0x00000000; - rctx->config->states[R600_CONFIG__PA_SC_MODE_CNTL] = 0x00514000; - rctx->config->states[R600_CONFIG__VGT_STRMOUT_EN] = 0x00000000; - rctx->config->states[R600_CONFIG__VGT_REUSE_OFF] = 0x00000001; - rctx->config->states[R600_CONFIG__VGT_VTX_CNT_EN] = 0x00000000; - rctx->config->states[R600_CONFIG__VGT_STRMOUT_BUFFER_EN] = 0x00000000; - radeon_state_pm4(rctx->config); + rctx->hw_states.config->states[R600_CONFIG__SQ_DYN_GPR_CNTL_PS_FLUSH_REQ] = 0x00004000; + rctx->hw_states.config->states[R600_CONFIG__TA_CNTL_AUX] = 0x07000002; + rctx->hw_states.config->states[R600_CONFIG__VC_ENHANCE] = 0x00000000; + rctx->hw_states.config->states[R600_CONFIG__DB_DEBUG] = 0x00000000; + rctx->hw_states.config->states[R600_CONFIG__DB_WATERMARKS] = 0x00420204; + rctx->hw_states.config->states[R600_CONFIG__SX_MISC] = 0x00000000; + rctx->hw_states.config->states[R600_CONFIG__SPI_THREAD_GROUPING] = 0x00000001; + rctx->hw_states.config->states[R600_CONFIG__CB_SHADER_CONTROL] = 0x00000003; + rctx->hw_states.config->states[R600_CONFIG__SQ_ESGS_RING_ITEMSIZE] = 0x00000000; + rctx->hw_states.config->states[R600_CONFIG__SQ_GSVS_RING_ITEMSIZE] = 0x00000000; + rctx->hw_states.config->states[R600_CONFIG__SQ_ESTMP_RING_ITEMSIZE] = 0x00000000; + rctx->hw_states.config->states[R600_CONFIG__SQ_GSTMP_RING_ITEMSIZE] = 0x00000000; + rctx->hw_states.config->states[R600_CONFIG__SQ_VSTMP_RING_ITEMSIZE] = 0x00000000; + rctx->hw_states.config->states[R600_CONFIG__SQ_PSTMP_RING_ITEMSIZE] = 0x00000000; + rctx->hw_states.config->states[R600_CONFIG__SQ_FBUF_RING_ITEMSIZE] = 0x00000000; + rctx->hw_states.config->states[R600_CONFIG__SQ_REDUC_RING_ITEMSIZE] = 0x00000000; + rctx->hw_states.config->states[R600_CONFIG__SQ_GS_VERT_ITEMSIZE] = 0x00000000; + rctx->hw_states.config->states[R600_CONFIG__VGT_OUTPUT_PATH_CNTL] = 0x00000000; + rctx->hw_states.config->states[R600_CONFIG__VGT_HOS_CNTL] = 0x00000000; + rctx->hw_states.config->states[R600_CONFIG__VGT_HOS_MAX_TESS_LEVEL] = 0x00000000; + rctx->hw_states.config->states[R600_CONFIG__VGT_HOS_MIN_TESS_LEVEL] = 0x00000000; + rctx->hw_states.config->states[R600_CONFIG__VGT_HOS_REUSE_DEPTH] = 0x00000000; + rctx->hw_states.config->states[R600_CONFIG__VGT_GROUP_PRIM_TYPE] = 0x00000000; + rctx->hw_states.config->states[R600_CONFIG__VGT_GROUP_FIRST_DECR] = 0x00000000; + rctx->hw_states.config->states[R600_CONFIG__VGT_GROUP_DECR] = 0x00000000; + rctx->hw_states.config->states[R600_CONFIG__VGT_GROUP_VECT_0_CNTL] = 0x00000000; + rctx->hw_states.config->states[R600_CONFIG__VGT_GROUP_VECT_1_CNTL] = 0x00000000; + rctx->hw_states.config->states[R600_CONFIG__VGT_GROUP_VECT_0_FMT_CNTL] = 0x00000000; + rctx->hw_states.config->states[R600_CONFIG__VGT_GROUP_VECT_1_FMT_CNTL] = 0x00000000; + rctx->hw_states.config->states[R600_CONFIG__VGT_GS_MODE] = 0x00000000; + rctx->hw_states.config->states[R600_CONFIG__PA_SC_MODE_CNTL] = 0x00514000; + rctx->hw_states.config->states[R600_CONFIG__VGT_STRMOUT_EN] = 0x00000000; + rctx->hw_states.config->states[R600_CONFIG__VGT_REUSE_OFF] = 0x00000001; + rctx->hw_states.config->states[R600_CONFIG__VGT_VTX_CNT_EN] = 0x00000000; + rctx->hw_states.config->states[R600_CONFIG__VGT_STRMOUT_BUFFER_EN] = 0x00000000; + radeon_state_pm4(rctx->hw_states.config); } struct pipe_context *r600_create_context(struct pipe_screen *screen, void *priv) @@ -333,19 +333,19 @@ struct pipe_context *r600_create_context(struct pipe_screen *screen, void *priv) return NULL; } - rctx->cb_cntl = radeon_state(rscreen->rw, R600_CB_CNTL_TYPE, R600_CB_CNTL); - rctx->cb_cntl->states[R600_CB_CNTL__CB_SHADER_MASK] = 0x0000000F; - rctx->cb_cntl->states[R600_CB_CNTL__CB_TARGET_MASK] = 0x0000000F; - rctx->cb_cntl->states[R600_CB_CNTL__CB_COLOR_CONTROL] = 0x00CC0000; - rctx->cb_cntl->states[R600_CB_CNTL__PA_SC_AA_CONFIG] = 0x00000000; - rctx->cb_cntl->states[R600_CB_CNTL__PA_SC_AA_SAMPLE_LOCS_MCTX] = 0x00000000; - rctx->cb_cntl->states[R600_CB_CNTL__PA_SC_AA_SAMPLE_LOCS_8S_WD1_MCTX] = 0x00000000; - rctx->cb_cntl->states[R600_CB_CNTL__CB_CLRCMP_CONTROL] = 0x01000000; - rctx->cb_cntl->states[R600_CB_CNTL__CB_CLRCMP_SRC] = 0x00000000; - rctx->cb_cntl->states[R600_CB_CNTL__CB_CLRCMP_DST] = 0x000000FF; - rctx->cb_cntl->states[R600_CB_CNTL__CB_CLRCMP_MSK] = 0xFFFFFFFF; - rctx->cb_cntl->states[R600_CB_CNTL__PA_SC_AA_MASK] = 0xFFFFFFFF; - radeon_state_pm4(rctx->cb_cntl); + rctx->hw_states.cb_cntl = radeon_state(rscreen->rw, R600_CB_CNTL_TYPE, R600_CB_CNTL); + rctx->hw_states.cb_cntl->states[R600_CB_CNTL__CB_SHADER_MASK] = 0x0000000F; + rctx->hw_states.cb_cntl->states[R600_CB_CNTL__CB_TARGET_MASK] = 0x0000000F; + rctx->hw_states.cb_cntl->states[R600_CB_CNTL__CB_COLOR_CONTROL] = 0x00CC0000; + rctx->hw_states.cb_cntl->states[R600_CB_CNTL__PA_SC_AA_CONFIG] = 0x00000000; + rctx->hw_states.cb_cntl->states[R600_CB_CNTL__PA_SC_AA_SAMPLE_LOCS_MCTX] = 0x00000000; + rctx->hw_states.cb_cntl->states[R600_CB_CNTL__PA_SC_AA_SAMPLE_LOCS_8S_WD1_MCTX] = 0x00000000; + rctx->hw_states.cb_cntl->states[R600_CB_CNTL__CB_CLRCMP_CONTROL] = 0x01000000; + rctx->hw_states.cb_cntl->states[R600_CB_CNTL__CB_CLRCMP_SRC] = 0x00000000; + rctx->hw_states.cb_cntl->states[R600_CB_CNTL__CB_CLRCMP_DST] = 0x000000FF; + rctx->hw_states.cb_cntl->states[R600_CB_CNTL__CB_CLRCMP_MSK] = 0xFFFFFFFF; + rctx->hw_states.cb_cntl->states[R600_CB_CNTL__PA_SC_AA_MASK] = 0xFFFFFFFF; + radeon_state_pm4(rctx->hw_states.cb_cntl); r600_init_config(rctx); diff --git a/src/gallium/drivers/r600/r600_context.h b/src/gallium/drivers/r600/r600_context.h index 30f33f757e..1f03b202ee 100644 --- a/src/gallium/drivers/r600/r600_context.h +++ b/src/gallium/drivers/r600/r600_context.h @@ -34,21 +34,74 @@ #include "r600_shader.h" /* XXX move this to a more appropriate place */ -struct r600_vertex_elements_state -{ - unsigned count; - struct pipe_vertex_element elements[32]; +union pipe_states { + struct pipe_rasterizer_state rasterizer; + struct pipe_poly_stipple poly_stipple; + struct pipe_scissor_state scissor; + struct pipe_clip_state clip; + struct pipe_shader_state shader; + struct pipe_depth_state depth; + struct pipe_stencil_state stencil; + struct pipe_alpha_state alpha; + struct pipe_depth_stencil_alpha_state dsa; + struct pipe_blend_state blend; + struct pipe_blend_color blend_color; + struct pipe_stencil_ref stencil_ref; + struct pipe_framebuffer_state framebuffer; + struct pipe_sampler_state sampler; + struct pipe_sampler_view sampler_view; + struct pipe_viewport_state viewport; }; -struct r600_pipe_shader { - struct r600_shader shader; - struct radeon_bo *bo; - struct radeon_state *state; +enum pipe_state_type { + pipe_rasterizer_type = 1, + pipe_poly_stipple_type, + pipe_scissor_type, + pipe_clip_type, + pipe_shader_type, + pipe_depth_type, + pipe_stencil_type, + pipe_alpha_type, + pipe_dsa_type, + pipe_blend_type, + pipe_stencil_ref_type, + pipe_framebuffer_type, + pipe_sampler_type, + pipe_sampler_view_type, + pipe_viewport_type, + pipe_type_count }; -struct r600_texture_resource { - struct pipe_sampler_view view; - struct radeon_state *state; +struct r600_context_state { + union pipe_states state; + unsigned refcount; + unsigned type; + struct radeon_state *rstate; + struct r600_shader shader; + struct radeon_bo *bo; +}; + +struct r600_vertex_element +{ + unsigned refcount; + unsigned count; + struct pipe_vertex_element elements[32]; +}; + +struct r600_context_hw_states { + struct radeon_state *rasterizer; + struct radeon_state *scissor; + struct radeon_state *dsa; + struct radeon_state *blend; + struct radeon_state *viewport; + struct radeon_state *cb0; + struct radeon_state *config; + struct radeon_state *cb_cntl; + struct radeon_state *db; + unsigned ps_nresource; + unsigned ps_nsampler; + struct radeon_state *ps_resource[160]; + struct radeon_state *ps_sampler[16]; }; struct r600_context { @@ -56,9 +109,11 @@ struct r600_context { struct r600_screen *screen; struct radeon *rw; struct radeon_ctx *ctx; - struct radeon_state *cb_cntl; - struct radeon_state *db; - struct radeon_state *config; + struct blitter_context *blitter; + struct radeon_draw *draw; + /* hw states */ + struct r600_context_hw_states hw_states; +#if 0 struct r600_pipe_shader *ps_shader; struct r600_pipe_shader *vs_shader; unsigned nps_sampler; @@ -71,12 +126,57 @@ struct r600_context { unsigned nvertex_buffer; struct r600_vertex_elements_state *vertex_elements; struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS]; - struct blitter_context *blitter; struct pipe_stencil_ref stencil_ref; struct pipe_framebuffer_state fb_state; - struct radeon_draw *draw; struct pipe_viewport_state viewport; +#endif + /* pipe states */ + unsigned flat_shade; + unsigned ps_nsampler; + unsigned vs_nsampler; + unsigned ps_nsampler_view; + unsigned vs_nsampler_view; + unsigned nvertex_buffer; + struct r600_context_state *rasterizer; + struct r600_context_state *poly_stipple; + struct r600_context_state *scissor; + struct r600_context_state *clip; + struct r600_context_state *ps_shader; + struct r600_context_state *vs_shader; + struct r600_context_state *depth; + struct r600_context_state *stencil; + struct r600_context_state *alpha; + struct r600_context_state *dsa; + struct r600_context_state *blend; + struct r600_context_state *stencil_ref; + struct r600_context_state *viewport; + struct r600_context_state *framebuffer; + struct r600_context_state *ps_sampler[PIPE_MAX_ATTRIBS]; + struct r600_context_state *vs_sampler[PIPE_MAX_ATTRIBS]; + struct r600_context_state *ps_sampler_view[PIPE_MAX_ATTRIBS]; + struct r600_context_state *vs_sampler_view[PIPE_MAX_ATTRIBS]; + struct r600_vertex_element *vertex_elements; + struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS]; +}; + +#if 0 +struct r600_vertex_elements_state +{ + unsigned count; + struct pipe_vertex_element elements[32]; +}; + +struct r600_pipe_shader { + struct r600_shader shader; + struct radeon_bo *bo; + struct radeon_state *state; +}; + +struct r600_texture_resource { + struct pipe_sampler_view view; + struct radeon_state *state; }; +#endif /* Convenience cast wrapper. */ static INLINE struct r600_context *r600_context(struct pipe_context *pipe) @@ -84,6 +184,12 @@ static INLINE struct r600_context *r600_context(struct pipe_context *pipe) return (struct r600_context*)pipe; } +struct r600_context_state *r600_context_state(struct r600_context *rctx, unsigned type, const void *state); +struct r600_context_state *r600_context_state_incref(struct r600_context_state *rstate); +struct r600_context_state *r600_context_state_decref(struct r600_context_state *rstate); + +int r600_context_hw_states(struct r600_context *rctx); + void r600_draw_arrays(struct pipe_context *ctx, unsigned mode, unsigned start, unsigned count); void r600_draw_elements(struct pipe_context *ctx, @@ -101,10 +207,11 @@ void r600_init_state_functions(struct r600_context *rctx); void r600_init_query_functions(struct r600_context* rctx); struct pipe_context *r600_create_context(struct pipe_screen *screen, void *priv); -void r600_pipe_shader_destroy(struct pipe_context *ctx, struct r600_pipe_shader *rpshader); -struct r600_pipe_shader *r600_pipe_shader_create(struct pipe_context *ctx, - const struct tgsi_token *tokens); -int r600_pipe_shader_update(struct pipe_context *ctx, struct r600_pipe_shader *rpshader); +int r600_pipe_shader_create(struct pipe_context *ctx, + struct r600_context_state *rstate, + const struct tgsi_token *tokens); +int r600_pipe_shader_update(struct pipe_context *ctx, + struct r600_context_state *rstate); #define R600_ERR(fmt, args...) \ fprintf(stderr, "EE %s/%s:%d - "fmt, __FILE__, __func__, __LINE__, ##args) diff --git a/src/gallium/drivers/r600/r600_draw.c b/src/gallium/drivers/r600/r600_draw.c index 8e9d11b855..b248beaf8c 100644 --- a/src/gallium/drivers/r600/r600_draw.c +++ b/src/gallium/drivers/r600/r600_draw.c @@ -55,8 +55,12 @@ static int r600_draw_common(struct r600_draw *draw) struct r600_resource *rbuffer; unsigned i, j, offset, format, prim; u32 vgt_dma_index_type, vgt_draw_initiator; + struct pipe_vertex_buffer *vertex_buffer; int r; + r = r600_context_hw_states(rctx); + if (r) + return r; switch (draw->index_size) { case 2: vgt_draw_initiator = 0; @@ -84,26 +88,18 @@ static int r600_draw_common(struct r600_draw *draw) r = r600_pipe_shader_update(draw->ctx, rctx->ps_shader); if (r) return r; - r = radeon_draw_set(rctx->draw, rctx->vs_shader->state); - if (r) - return r; - r = radeon_draw_set(rctx->draw, rctx->ps_shader->state); - if (r) - return r; - r = radeon_draw_set(rctx->draw, rctx->cb_cntl); - if (r) - return r; - r = radeon_draw_set(rctx->draw, rctx->db); + r = radeon_draw_set(rctx->draw, rctx->vs_shader->rstate); if (r) return r; - r = radeon_draw_set(rctx->draw, rctx->config); + r = radeon_draw_set(rctx->draw, rctx->ps_shader->rstate); if (r) return r; for (i = 0 ; i < rctx->vertex_elements->count; i++) { j = rctx->vertex_elements->elements[i].vertex_buffer_index; - rbuffer = (struct r600_resource*)rctx->vertex_buffer[j].buffer; - offset = rctx->vertex_elements->elements[i].src_offset + rctx->vertex_buffer[j].buffer_offset; + vertex_buffer = &rctx->vertex_buffer[j]; + rbuffer = (struct r600_resource*)vertex_buffer->buffer; + offset = rctx->vertex_elements->elements[i].src_offset + vertex_buffer->buffer_offset; r = r600_conv_pipe_format(rctx->vertex_elements->elements[i].src_format, &format); if (r) return r; @@ -114,7 +110,7 @@ static int r600_draw_common(struct r600_draw *draw) vs_resource->nbo = 1; vs_resource->states[R600_PS_RESOURCE__RESOURCE0_WORD0] = offset; vs_resource->states[R600_PS_RESOURCE__RESOURCE0_WORD1] = rbuffer->bo->size - offset; - vs_resource->states[R600_PS_RESOURCE__RESOURCE0_WORD2] = S_038008_STRIDE(rctx->vertex_buffer[j].stride) | + vs_resource->states[R600_PS_RESOURCE__RESOURCE0_WORD2] = S_038008_STRIDE(vertex_buffer->stride) | S_038008_DATA_FORMAT(format); vs_resource->states[R600_PS_RESOURCE__RESOURCE0_WORD3] = 0x00000000; vs_resource->states[R600_PS_RESOURCE__RESOURCE0_WORD4] = 0x00000000; @@ -126,17 +122,19 @@ static int r600_draw_common(struct r600_draw *draw) if (r) return r; } +#if 0 /* setup texture sampler & resource */ - for (i = 0 ; i < rctx->nps_sampler; i++) { - r = radeon_draw_set_new(rctx->draw, rctx->ps_sampler[i]); + for (i = 0 ; i < rctx->ps_nsampler; i++) { + r = radeon_draw_set_new(rctx->draw, rctx->ps_sampler[i]->rstate); if (r) return r; } - for (i = 0 ; i < rctx->nps_view; i++) { - r = radeon_draw_set_new(rctx->draw, rctx->ps_view[i]->state); + for (i = 0 ; i < rctx->ps_nsampler_view; i++) { + r = radeon_draw_set_new(rctx->draw, rctx->ps_sampler_view[i]->rstate); if (r) return r; } +#endif /* FIXME start need to change winsys */ draw->draw = radeon_state(rscreen->rw, R600_DRAW_TYPE, R600_DRAW); if (draw->draw == NULL) diff --git a/src/gallium/drivers/r600/r600_resource.c b/src/gallium/drivers/r600/r600_resource.c index 292c5d294d..8dc411ef40 100644 --- a/src/gallium/drivers/r600/r600_resource.c +++ b/src/gallium/drivers/r600/r600_resource.c @@ -24,7 +24,6 @@ #include "r600_context.h" #include "r600_resource.h" #include "r600_screen.h" -#include "r600_texture.h" static struct pipe_resource *r600_resource_create(struct pipe_screen *screen, const struct pipe_resource *templ) diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index 3909c704e7..8837a7272b 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -98,43 +98,40 @@ static int r600_shader_update(struct pipe_context *ctx, struct r600_shader *shad return r600_bc_build(&shader->bc); } -struct r600_pipe_shader *r600_pipe_shader_create(struct pipe_context *ctx, - const struct tgsi_token *tokens) +int r600_pipe_shader_create(struct pipe_context *ctx, + struct r600_context_state *rpshader, + const struct tgsi_token *tokens) { struct r600_screen *rscreen = r600_screen(ctx->screen); - struct r600_pipe_shader *rpshader = CALLOC_STRUCT(r600_pipe_shader); int r; fprintf(stderr, "--------------------------------------------------------------\n"); tgsi_dump(tokens, 0); if (rpshader == NULL) - return NULL; + return -ENOMEM; rpshader->shader.family = radeon_get_family(rscreen->rw); r = r600_shader_from_tgsi(tokens, &rpshader->shader); if (r) { R600_ERR("translation from TGSI failed !\n"); - goto out_err; + return r; } r = r600_bc_build(&rpshader->shader.bc); if (r) { R600_ERR("building bytecode failed !\n"); - goto out_err; + return r; } fprintf(stderr, "______________________________________________________________\n"); - return rpshader; -out_err: - free(rpshader); - return NULL; + return 0; } -static int r600_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shader *rpshader) +static int r600_pipe_shader_vs(struct pipe_context *ctx, struct r600_context_state *rpshader) { struct r600_screen *rscreen = r600_screen(ctx->screen); struct r600_shader *rshader = &rpshader->shader; struct radeon_state *state; unsigned i, j, tmp; - rpshader->state = radeon_state_decref(rpshader->state); + rpshader->rstate = radeon_state_decref(rpshader->rstate); state = radeon_state(rscreen->rw, R600_VS_SHADER_TYPE, R600_VS_SHADER); if (state == NULL) return -ENOMEM; @@ -150,22 +147,22 @@ static int r600_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shader } state->states[R600_VS_SHADER__SPI_VS_OUT_CONFIG] = S_0286C4_VS_EXPORT_COUNT(rshader->noutput - 2); state->states[R600_VS_SHADER__SQ_PGM_RESOURCES_VS] = S_028868_NUM_GPRS(rshader->bc.ngpr); - rpshader->state = state; - rpshader->state->bo[0] = radeon_bo_incref(rscreen->rw, rpshader->bo); - rpshader->state->bo[1] = radeon_bo_incref(rscreen->rw, rpshader->bo); - rpshader->state->nbo = 2; - rpshader->state->placement[0] = RADEON_GEM_DOMAIN_GTT; + rpshader->rstate = state; + rpshader->rstate->bo[0] = radeon_bo_incref(rscreen->rw, rpshader->bo); + rpshader->rstate->bo[1] = radeon_bo_incref(rscreen->rw, rpshader->bo); + rpshader->rstate->nbo = 2; + rpshader->rstate->placement[0] = RADEON_GEM_DOMAIN_GTT; return radeon_state_pm4(state); } -static int r600_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader *rpshader) +static int r600_pipe_shader_ps(struct pipe_context *ctx, struct r600_context_state *rpshader) { struct r600_screen *rscreen = r600_screen(ctx->screen); struct r600_shader *rshader = &rpshader->shader; struct radeon_state *state; unsigned i, tmp; - rpshader->state = radeon_state_decref(rpshader->state); + rpshader->rstate = radeon_state_decref(rpshader->rstate); state = radeon_state(rscreen->rw, R600_PS_SHADER_TYPE, R600_PS_SHADER); if (state == NULL) return -ENOMEM; @@ -180,14 +177,14 @@ static int r600_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader state->states[R600_PS_SHADER__SPI_PS_IN_CONTROL_1] = 0x00000000; state->states[R600_PS_SHADER__SQ_PGM_RESOURCES_PS] = S_028868_NUM_GPRS(rshader->bc.ngpr); state->states[R600_PS_SHADER__SQ_PGM_EXPORTS_PS] = 0x00000002; - rpshader->state = state; - rpshader->state->bo[0] = radeon_bo_incref(rscreen->rw, rpshader->bo); - rpshader->state->nbo = 1; - rpshader->state->placement[0] = RADEON_GEM_DOMAIN_GTT; + rpshader->rstate = state; + rpshader->rstate->bo[0] = radeon_bo_incref(rscreen->rw, rpshader->bo); + rpshader->rstate->nbo = 1; + rpshader->rstate->placement[0] = RADEON_GEM_DOMAIN_GTT; return radeon_state_pm4(state); } -static int r600_pipe_shader(struct pipe_context *ctx, struct r600_pipe_shader *rpshader) +static int r600_pipe_shader(struct pipe_context *ctx, struct r600_context_state *rpshader) { struct r600_screen *rscreen = r600_screen(ctx->screen); struct r600_context *rctx = r600_context(ctx); @@ -221,7 +218,7 @@ static int r600_pipe_shader(struct pipe_context *ctx, struct r600_pipe_shader *r return r; } -int r600_pipe_shader_update(struct pipe_context *ctx, struct r600_pipe_shader *rpshader) +int r600_pipe_shader_update(struct pipe_context *ctx, struct r600_context_state *rpshader) { struct r600_context *rctx = r600_context(ctx); int r; diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index 7d67e28c06..0f1e1cd761 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -32,18 +32,557 @@ #include "r600_resource.h" #include "r600d.h" +static void *r600_create_blend_state(struct pipe_context *ctx, + const struct pipe_blend_state *state) +{ + struct r600_context *rctx = r600_context(ctx); + + return r600_context_state(rctx, pipe_blend_type, state); +} + +static void *r600_create_dsa_state(struct pipe_context *ctx, + const struct pipe_depth_stencil_alpha_state *state) +{ + struct r600_context *rctx = r600_context(ctx); + + return r600_context_state(rctx, pipe_dsa_type, state); +} + +static void *r600_create_rs_state(struct pipe_context *ctx, + const struct pipe_rasterizer_state *state) +{ + struct r600_context *rctx = r600_context(ctx); + + return r600_context_state(rctx, pipe_rasterizer_type, state); +} + +static void *r600_create_sampler_state(struct pipe_context *ctx, + const struct pipe_sampler_state *state) +{ + struct r600_context *rctx = r600_context(ctx); + + return r600_context_state(rctx, pipe_sampler_type, state); +} + +static void r600_sampler_view_destroy(struct pipe_context *ctx, + struct pipe_sampler_view *state) +{ + struct r600_context_state *rstate = (struct r600_context_state *)state; + + r600_context_state_decref(rstate); +} + +static struct pipe_sampler_view *r600_create_sampler_view(struct pipe_context *ctx, + struct pipe_resource *texture, + const struct pipe_sampler_view *state) +{ + struct r600_context *rctx = r600_context(ctx); + struct r600_context_state *rstate; + + rstate = r600_context_state(rctx, pipe_sampler_type, state); + pipe_reference(NULL, &texture->reference); + rstate->state.sampler_view.texture = texture; + rstate->state.sampler_view.reference.count = 1; + return &rstate->state.sampler_view; +} + +static void *r600_create_shader_state(struct pipe_context *ctx, + const struct pipe_shader_state *state) +{ + struct r600_context *rctx = r600_context(ctx); + + return r600_context_state(rctx, pipe_shader_type, state); +} + +static void *r600_create_vertex_elements(struct pipe_context *ctx, + unsigned count, + const struct pipe_vertex_element *elements) +{ + struct r600_vertex_element *v = CALLOC_STRUCT(r600_vertex_element); + + assert(count < 32); + v->count = count; + memcpy(v->elements, elements, count * sizeof(struct pipe_vertex_element)); + v->refcount = 1; + return v; +} + +static void r600_bind_state(struct pipe_context *ctx, void *state) +{ + struct r600_context *rctx = r600_context(ctx); + struct r600_context_state *rstate = (struct r600_context_state *)state; + + if (state == NULL) + return; + switch (rstate->type) { + case pipe_rasterizer_type: + rctx->rasterizer = r600_context_state_decref(rctx->rasterizer); + rctx->rasterizer = r600_context_state_incref(rstate); + break; + case pipe_poly_stipple_type: + rctx->poly_stipple = r600_context_state_decref(rctx->poly_stipple); + rctx->poly_stipple = r600_context_state_incref(rstate); + break; + case pipe_scissor_type: + rctx->scissor = r600_context_state_decref(rctx->scissor); + rctx->scissor = r600_context_state_incref(rstate); + break; + case pipe_clip_type: + rctx->clip = r600_context_state_decref(rctx->clip); + rctx->clip = r600_context_state_incref(rstate); + break; + case pipe_depth_type: + rctx->depth = r600_context_state_decref(rctx->depth); + rctx->depth = r600_context_state_incref(rstate); + break; + case pipe_stencil_type: + rctx->stencil = r600_context_state_decref(rctx->stencil); + rctx->stencil = r600_context_state_incref(rstate); + break; + case pipe_alpha_type: + rctx->alpha = r600_context_state_decref(rctx->alpha); + rctx->alpha = r600_context_state_incref(rstate); + break; + case pipe_dsa_type: + rctx->dsa = r600_context_state_decref(rctx->dsa); + rctx->dsa = r600_context_state_incref(rstate); + break; + case pipe_blend_type: + rctx->blend = r600_context_state_decref(rctx->blend); + rctx->blend = r600_context_state_incref(rstate); + break; + case pipe_framebuffer_type: + rctx->framebuffer = r600_context_state_decref(rctx->framebuffer); + rctx->framebuffer = r600_context_state_incref(rstate); + break; + case pipe_stencil_ref_type: + rctx->stencil_ref = r600_context_state_decref(rctx->stencil_ref); + rctx->stencil_ref = r600_context_state_incref(rstate); + break; + case pipe_viewport_type: + rctx->viewport = r600_context_state_decref(rctx->viewport); + rctx->viewport = r600_context_state_incref(rstate); + break; + case pipe_shader_type: + case pipe_sampler_type: + case pipe_sampler_view_type: + default: + R600_ERR("invalid type %d\n", rstate->type); + return; + } +} + +static void r600_bind_ps_shader(struct pipe_context *ctx, void *state) +{ + struct r600_context *rctx = r600_context(ctx); + struct r600_context_state *rstate = (struct r600_context_state *)state; + + rctx->ps_shader = r600_context_state_decref(rctx->ps_shader); + rctx->ps_shader = r600_context_state_incref(rstate); +} + +static void r600_bind_vs_shader(struct pipe_context *ctx, void *state) +{ + struct r600_context *rctx = r600_context(ctx); + struct r600_context_state *rstate = (struct r600_context_state *)state; + + rctx->vs_shader = r600_context_state_decref(rctx->vs_shader); + rctx->vs_shader = r600_context_state_incref(rstate); +} + +static void r600_delete_vertex_element(struct pipe_context *ctx, void *state) +{ + struct r600_vertex_element *v = (struct r600_vertex_element*)state; + + if (v == NULL) + return; + if (--v->refcount) + return; + free(v); +} + +static void r600_bind_vertex_elements(struct pipe_context *ctx, void *state) +{ + struct r600_context *rctx = r600_context(ctx); + struct r600_vertex_element *v = (struct r600_vertex_element*)state; + + r600_delete_vertex_element(ctx, rctx->vertex_elements); + rctx->vertex_elements = v; + if (v) { + v->refcount++; + } +} + +static void r600_bind_ps_sampler(struct pipe_context *ctx, + unsigned count, void **states) +{ + struct r600_context *rctx = r600_context(ctx); + struct r600_context_state *rstate; + unsigned i; + + for (i = 0; i < rctx->ps_nsampler; i++) { + rctx->ps_sampler[i] = r600_context_state_decref(rctx->ps_sampler[i]); + } + for (i = 0; i < count; i++) { + rstate = (struct r600_context_state *)states[i]; + rctx->ps_sampler[i] = r600_context_state_incref(rstate); + } + rctx->ps_nsampler = count; +} + +static void r600_bind_vs_sampler(struct pipe_context *ctx, + unsigned count, void **states) +{ + struct r600_context *rctx = r600_context(ctx); + struct r600_context_state *rstate; + unsigned i; + + for (i = 0; i < rctx->vs_nsampler; i++) { + rctx->vs_sampler[i] = r600_context_state_decref(rctx->vs_sampler[i]); + } + for (i = 0; i < count; i++) { + rstate = (struct r600_context_state *)states[i]; + rctx->vs_sampler[i] = r600_context_state_incref(rstate); + } + rctx->vs_nsampler = count; +} static void r600_delete_state(struct pipe_context *ctx, void *state) { - struct radeon_state *rstate = state; + struct r600_context_state *rstate = (struct r600_context_state *)state; - radeon_state_decref(rstate); + r600_context_state_decref(rstate); } -static void *r600_create_blend_state(struct pipe_context *ctx, - const struct pipe_blend_state *state) +static void r600_set_blend_color(struct pipe_context *ctx, + const struct pipe_blend_color *color) +{ +} + +static void r600_set_clip_state(struct pipe_context *ctx, + const struct pipe_clip_state *state) +{ +} + +static void r600_set_constant_buffer(struct pipe_context *ctx, + uint shader, uint index, + struct pipe_resource *buffer) { struct r600_screen *rscreen = r600_screen(ctx->screen); + struct r600_context *rctx = r600_context(ctx); + unsigned nconstant = 0, i, type, id; + struct radeon_state *rstate; + struct pipe_transfer *transfer; + u32 *ptr; + + switch (shader) { + case PIPE_SHADER_VERTEX: + id = R600_VS_CONSTANT; + type = R600_VS_CONSTANT_TYPE; + break; + case PIPE_SHADER_FRAGMENT: + id = R600_PS_CONSTANT; + type = R600_PS_CONSTANT_TYPE; + break; + default: + fprintf(stderr, "%s:%d unsupported %d\n", __func__, __LINE__, shader); + return; + } + if (buffer && buffer->width0 > 0) { + nconstant = buffer->width0 / 16; + ptr = pipe_buffer_map(ctx, buffer, PIPE_TRANSFER_READ, &transfer); + if (ptr == NULL) + return; + for (i = 0; i < nconstant; i++) { + rstate = radeon_state(rscreen->rw, type, id + i); + if (rstate == NULL) + return; + rstate->states[R600_PS_CONSTANT__SQ_ALU_CONSTANT0_0] = ptr[i * 4 + 0]; + rstate->states[R600_PS_CONSTANT__SQ_ALU_CONSTANT1_0] = ptr[i * 4 + 1]; + rstate->states[R600_PS_CONSTANT__SQ_ALU_CONSTANT2_0] = ptr[i * 4 + 2]; + rstate->states[R600_PS_CONSTANT__SQ_ALU_CONSTANT3_0] = ptr[i * 4 + 3]; + if (radeon_state_pm4(rstate)) + return; + if (radeon_draw_set_new(rctx->draw, rstate)) + return; + } + pipe_buffer_unmap(ctx, buffer, transfer); + } +} + +static void r600_set_ps_sampler_view(struct pipe_context *ctx, + unsigned count, + struct pipe_sampler_view **views) +{ + struct r600_context *rctx = r600_context(ctx); + struct r600_context_state *rstate; + unsigned i; + + for (i = 0; i < rctx->ps_nsampler_view; i++) { + rctx->ps_sampler_view[i] = r600_context_state_decref(rctx->ps_sampler_view[i]); + } + for (i = 0; i < count; i++) { + rstate = (struct r600_context_state *)views[i]; + rctx->ps_sampler_view[i] = r600_context_state_incref(rstate); + } + rctx->ps_nsampler_view = count; +} + +static void r600_set_vs_sampler_view(struct pipe_context *ctx, + unsigned count, + struct pipe_sampler_view **views) +{ + struct r600_context *rctx = r600_context(ctx); + struct r600_context_state *rstate; + unsigned i; + + for (i = 0; i < rctx->vs_nsampler_view; i++) { + rctx->vs_sampler_view[i] = r600_context_state_decref(rctx->vs_sampler_view[i]); + } + for (i = 0; i < count; i++) { + rstate = (struct r600_context_state *)views[i]; + rctx->vs_sampler_view[i] = r600_context_state_incref(rstate); + } + rctx->vs_nsampler_view = count; +} + +static void r600_set_framebuffer_state(struct pipe_context *ctx, + const struct pipe_framebuffer_state *state) +{ + struct r600_context *rctx = r600_context(ctx); + struct r600_context_state *rstate; + + rstate = r600_context_state(rctx, pipe_framebuffer_type, state); + r600_bind_state(ctx, rstate); +} + +static void r600_set_polygon_stipple(struct pipe_context *ctx, + const struct pipe_poly_stipple *state) +{ +} + +static void r600_set_sample_mask(struct pipe_context *pipe, unsigned sample_mask) +{ +} + +static void r600_set_scissor_state(struct pipe_context *ctx, + const struct pipe_scissor_state *state) +{ + struct r600_context *rctx = r600_context(ctx); + struct r600_context_state *rstate; + + rstate = r600_context_state(rctx, pipe_scissor_type, state); + r600_bind_state(ctx, rstate); +} + +static void r600_set_stencil_ref(struct pipe_context *ctx, + const struct pipe_stencil_ref *state) +{ + struct r600_context *rctx = r600_context(ctx); + struct r600_context_state *rstate; + + rstate = r600_context_state(rctx, pipe_stencil_ref_type, state); + r600_bind_state(ctx, rstate); +} + +static void r600_set_vertex_buffers(struct pipe_context *ctx, + unsigned count, + const struct pipe_vertex_buffer *buffers) +{ + struct r600_context *rctx = r600_context(ctx); + unsigned i; + + for (i = 0; i < rctx->nvertex_buffer; i++) { + pipe_resource_reference(&rctx->vertex_buffer[i].buffer, NULL); + } + memcpy(rctx->vertex_buffer, buffers, sizeof(struct pipe_vertex_buffer) * count); + for (i = 0; i < count; i++) { + rctx->vertex_buffer[i].buffer = NULL; + pipe_resource_reference(&rctx->vertex_buffer[i].buffer, buffers[i].buffer); + } + rctx->nvertex_buffer = count; +} + +static void r600_set_viewport_state(struct pipe_context *ctx, + const struct pipe_viewport_state *state) +{ + struct r600_context *rctx = r600_context(ctx); + struct r600_context_state *rstate; + + rstate = r600_context_state(rctx, pipe_viewport_type, state); + r600_bind_state(ctx, rstate); +} + +void r600_init_state_functions(struct r600_context *rctx) +{ + rctx->context.create_blend_state = r600_create_blend_state; + rctx->context.create_depth_stencil_alpha_state = r600_create_dsa_state; + rctx->context.create_fs_state = r600_create_shader_state; + rctx->context.create_rasterizer_state = r600_create_rs_state; + rctx->context.create_sampler_state = r600_create_sampler_state; + rctx->context.create_sampler_view = r600_create_sampler_view; + rctx->context.create_vertex_elements_state = r600_create_vertex_elements; + rctx->context.create_vs_state = r600_create_shader_state; + rctx->context.bind_blend_state = r600_bind_state; + rctx->context.bind_depth_stencil_alpha_state = r600_bind_state; + rctx->context.bind_fragment_sampler_states = r600_bind_ps_sampler; + rctx->context.bind_fs_state = r600_bind_ps_shader; + rctx->context.bind_rasterizer_state = r600_bind_state; + rctx->context.bind_vertex_elements_state = r600_bind_vertex_elements; + rctx->context.bind_vertex_sampler_states = r600_bind_vs_sampler; + rctx->context.bind_vs_state = r600_bind_vs_shader; + rctx->context.delete_blend_state = r600_delete_state; + rctx->context.delete_depth_stencil_alpha_state = r600_delete_state; + rctx->context.delete_fs_state = r600_delete_state; + rctx->context.delete_rasterizer_state = r600_delete_state; + rctx->context.delete_sampler_state = r600_delete_state; + rctx->context.delete_vertex_elements_state = r600_delete_vertex_element; + rctx->context.delete_vs_state = r600_delete_state; + rctx->context.set_blend_color = r600_set_blend_color; + rctx->context.set_clip_state = r600_set_clip_state; + rctx->context.set_constant_buffer = r600_set_constant_buffer; + rctx->context.set_fragment_sampler_views = r600_set_ps_sampler_view; + rctx->context.set_framebuffer_state = r600_set_framebuffer_state; + rctx->context.set_polygon_stipple = r600_set_polygon_stipple; + rctx->context.set_sample_mask = r600_set_sample_mask; + rctx->context.set_scissor_state = r600_set_scissor_state; + rctx->context.set_stencil_ref = r600_set_stencil_ref; + rctx->context.set_vertex_buffers = r600_set_vertex_buffers; + rctx->context.set_vertex_sampler_views = r600_set_vs_sampler_view; + rctx->context.set_viewport_state = r600_set_viewport_state; + rctx->context.sampler_view_destroy = r600_sampler_view_destroy; +} + +struct r600_context_state *r600_context_state_incref(struct r600_context_state *rstate) +{ + if (rstate == NULL) + return NULL; + rstate->refcount++; + return rstate; +} + +struct r600_context_state *r600_context_state_decref(struct r600_context_state *rstate) +{ + unsigned i; + + if (rstate == NULL) + return NULL; + if (--rstate->refcount) + return NULL; + switch (rstate->type) { + case pipe_sampler_view_type: + pipe_resource_reference(&rstate->state.sampler_view.texture, NULL); + break; + case pipe_framebuffer_type: + for (i = 0; i < rstate->state.framebuffer.nr_cbufs; i++) { + pipe_surface_reference(&rstate->state.framebuffer.cbufs[i], NULL); + } + pipe_surface_reference(&rstate->state.framebuffer.zsbuf, NULL); + break; + case pipe_viewport_type: + case pipe_depth_type: + case pipe_rasterizer_type: + case pipe_poly_stipple_type: + case pipe_scissor_type: + case pipe_clip_type: + case pipe_stencil_type: + case pipe_alpha_type: + case pipe_dsa_type: + case pipe_blend_type: + case pipe_stencil_ref_type: + case pipe_shader_type: + case pipe_sampler_type: + break; + default: + R600_ERR("invalid type %d\n", rstate->type); + return NULL; + } + radeon_state_decref(rstate->rstate); + FREE(rstate); + return NULL; +} + +struct r600_context_state *r600_context_state(struct r600_context *rctx, unsigned type, const void *state) +{ + struct r600_context_state *rstate = CALLOC_STRUCT(r600_context_state); + const union pipe_states *states = state; + unsigned i; + int r; + + if (rstate == NULL) + return NULL; + rstate->type = type; + rstate->refcount = 1; + + switch (rstate->type) { + case pipe_sampler_view_type: + rstate->state.sampler_view = (*states).sampler_view; + rstate->state.sampler_view.texture = NULL; + break; + case pipe_framebuffer_type: + rstate->state.framebuffer = (*states).framebuffer; + for (i = 0; i < rstate->state.framebuffer.nr_cbufs; i++) { + pipe_surface_reference(&rstate->state.framebuffer.cbufs[i], + (*states).framebuffer.cbufs[i]); + } + pipe_surface_reference(&rstate->state.framebuffer.zsbuf, + (*states).framebuffer.zsbuf); + break; + case pipe_viewport_type: + rstate->state.viewport = (*states).viewport; + break; + case pipe_depth_type: + rstate->state.depth = (*states).depth; + break; + case pipe_rasterizer_type: + rstate->state.rasterizer = (*states).rasterizer; + break; + case pipe_poly_stipple_type: + rstate->state.poly_stipple = (*states).poly_stipple; + break; + case pipe_scissor_type: + rstate->state.scissor = (*states).scissor; + break; + case pipe_clip_type: + rstate->state.clip = (*states).clip; + break; + case pipe_stencil_type: + rstate->state.stencil = (*states).stencil; + break; + case pipe_alpha_type: + rstate->state.alpha = (*states).alpha; + break; + case pipe_dsa_type: + rstate->state.dsa = (*states).dsa; + break; + case pipe_blend_type: + rstate->state.blend = (*states).blend; + break; + case pipe_stencil_ref_type: + rstate->state.stencil_ref = (*states).stencil_ref; + break; + case pipe_shader_type: + rstate->state.shader = (*states).shader; + r = r600_pipe_shader_create(&rctx->context, rstate, rstate->state.shader.tokens); + if (r) { + r600_context_state_decref(rstate); + return NULL; + } + break; + case pipe_sampler_type: + rstate->state.sampler = (*states).sampler; + break; + default: + R600_ERR("invalid type %d\n", rstate->type); + FREE(rstate); + return NULL; + } + return rstate; +} + +static struct radeon_state *r600_blend(struct r600_context *rctx) +{ + struct r600_screen *rscreen = rctx->screen; struct radeon_state *rstate; rstate = radeon_state(rscreen->rw, R600_BLEND_TYPE, R600_BLEND); @@ -69,36 +608,19 @@ static void *r600_create_blend_state(struct pipe_context *ctx, return rstate; } -static void r600_bind_blend_state(struct pipe_context *ctx, void *state) +static struct radeon_state *r600_cb0(struct r600_context *rctx) { - struct r600_context *rctx = r600_context(ctx); - radeon_draw_set(rctx->draw, state); -} - -static void r600_set_blend_color(struct pipe_context *ctx, - const struct pipe_blend_color *color) -{ -} - -static void r600_set_clip_state(struct pipe_context *ctx, - const struct pipe_clip_state *state) -{ -} - -static void r600_set_framebuffer_state(struct pipe_context *ctx, - const struct pipe_framebuffer_state *state) -{ - struct r600_screen *rscreen = r600_screen(ctx->screen); - struct r600_context *rctx = r600_context(ctx); + struct r600_screen *rscreen = rctx->screen; struct r600_resource_texture *rtex; struct r600_resource *rbuffer; struct radeon_state *rstate; + const struct pipe_framebuffer_state *state = &rctx->framebuffer->state.framebuffer; unsigned level = state->cbufs[0]->level; unsigned pitch, slice; rstate = radeon_state(rscreen->rw, R600_CB0_TYPE, R600_CB0); if (rstate == NULL) - return; + return NULL; rtex = (struct r600_resource_texture*)state->cbufs[0]->texture; rbuffer = &rtex->resource; rstate->bo[0] = radeon_bo_incref(rscreen->rw, rbuffer->bo); @@ -120,70 +642,53 @@ static void r600_set_framebuffer_state(struct pipe_context *ctx, rstate->states[R600_CB0__CB_COLOR0_MASK] = 0x00000000; if (radeon_state_pm4(rstate)) { radeon_state_decref(rstate); - return; + return NULL; } - radeon_draw_set_new(rctx->draw, rstate); - rctx->db = radeon_state_decref(rctx->db); - if(state->zsbuf) { - rtex = (struct r600_resource_texture*)state->zsbuf->texture; - rbuffer = &rtex->resource; - rctx->db = radeon_state(rscreen->rw, R600_DB_TYPE, R600_DB); - if(rctx->db == NULL) - return; - rctx->db->bo[0] = radeon_bo_incref(rscreen->rw, rbuffer->bo); - rctx->db->nbo = 1; - rctx->db->placement[0] = RADEON_GEM_DOMAIN_VRAM; - level = state->zsbuf->level; - pitch = rtex->pitch[level] / 8 - 1; - slice = rtex->pitch[level] * state->zsbuf->height / 64 - 1; - - rctx->db->states[R600_DB__DB_DEPTH_BASE] = 0x00000000; - rctx->db->states[R600_DB__DB_DEPTH_INFO] = 0x00010006; - rctx->db->states[R600_DB__DB_DEPTH_VIEW] = 0x00000000; - rctx->db->states[R600_DB__DB_PREFETCH_LIMIT] = (state->zsbuf->height / 8) -1; - rctx->db->states[R600_DB__DB_DEPTH_SIZE] = S_028000_PITCH_TILE_MAX(pitch) | - S_028000_SLICE_TILE_MAX(slice); - } else - rctx->db = NULL; - rctx->fb_state = *state; -} - -static void *r600_create_fs_state(struct pipe_context *ctx, - const struct pipe_shader_state *shader) -{ - return r600_pipe_shader_create(ctx, shader->tokens); -} - -static void r600_bind_fs_state(struct pipe_context *ctx, void *state) -{ - struct r600_context *rctx = r600_context(ctx); - - rctx->ps_shader = state; + return rstate; } -static void *r600_create_vs_state(struct pipe_context *ctx, - const struct pipe_shader_state *shader) +static struct radeon_state *r600_db(struct r600_context *rctx) { - return r600_pipe_shader_create(ctx, shader->tokens); -} + struct r600_screen *rscreen = rctx->screen; + struct r600_resource_texture *rtex; + struct r600_resource *rbuffer; + struct radeon_state *rstate; + const struct pipe_framebuffer_state *state = &rctx->framebuffer->state.framebuffer; + unsigned level = state->cbufs[0]->level; + unsigned pitch, slice; -static void r600_bind_vs_state(struct pipe_context *ctx, void *state) -{ - struct r600_context *rctx = r600_context(ctx); + if (state->zsbuf == NULL) + return NULL; - rctx->vs_shader = state; -} + rstate = radeon_state(rscreen->rw, R600_DB_TYPE, R600_DB); + if (rstate == NULL) + return NULL; -static void r600_set_polygon_stipple(struct pipe_context *ctx, - const struct pipe_poly_stipple *state) -{ + rtex = (struct r600_resource_texture*)state->zsbuf->texture; + rbuffer = &rtex->resource; + rstate->bo[0] = radeon_bo_incref(rscreen->rw, rbuffer->bo); + rstate->nbo = 1; + rstate->placement[0] = RADEON_GEM_DOMAIN_VRAM; + level = state->zsbuf->level; + pitch = rtex->pitch[level] / 8 - 1; + slice = rtex->pitch[level] * state->zsbuf->height / 64 - 1; + rstate->states[R600_DB__DB_DEPTH_BASE] = 0x00000000; + rstate->states[R600_DB__DB_DEPTH_INFO] = 0x00010006; + rstate->states[R600_DB__DB_DEPTH_VIEW] = 0x00000000; + rstate->states[R600_DB__DB_PREFETCH_LIMIT] = (state->zsbuf->height / 8) -1; + rstate->states[R600_DB__DB_DEPTH_SIZE] = S_028000_PITCH_TILE_MAX(pitch) | + S_028000_SLICE_TILE_MAX(slice); + if (radeon_state_pm4(rstate)) { + radeon_state_decref(rstate); + return NULL; + } + return rstate; } -static void *r600_create_rs_state(struct pipe_context *ctx, - const struct pipe_rasterizer_state *state) +static struct radeon_state *r600_rasterizer(struct r600_context *rctx) { - struct r600_screen *rscreen = r600_screen(ctx->screen); - struct r600_context *rctx = r600_context(ctx); + const struct pipe_rasterizer_state *state = &rctx->rasterizer->state.rasterizer; + struct r600_screen *rscreen = rctx->screen; struct radeon_state *rstate; rctx->flat_shade = state->flatshade; @@ -220,10 +725,102 @@ R600_ERR("flat shade with texture broke tex coord interp\n"); return rstate; } -static void r600_bind_rs_state(struct pipe_context *ctx, void *state) +static struct radeon_state *r600_scissor(struct r600_context *rctx) { - struct r600_context *rctx = r600_context(ctx); - radeon_draw_set(rctx->draw, state); + const struct pipe_scissor_state *state = &rctx->scissor->state.scissor; + struct r600_screen *rscreen = rctx->screen; + struct radeon_state *rstate; + u32 tl, br; + + tl = S_028240_TL_X(state->minx) | S_028240_TL_Y(state->miny) | S_028240_WINDOW_OFFSET_DISABLE(1); + br = S_028244_BR_X(state->maxx) | S_028244_BR_Y(state->maxy); + rstate = radeon_state(rscreen->rw, R600_SCISSOR_TYPE, R600_SCISSOR); + if (rstate == NULL) + return NULL; + rstate->states[R600_SCISSOR__PA_SC_SCREEN_SCISSOR_TL] = tl; + rstate->states[R600_SCISSOR__PA_SC_SCREEN_SCISSOR_BR] = br; + rstate->states[R600_SCISSOR__PA_SC_WINDOW_OFFSET] = 0x00000000; + rstate->states[R600_SCISSOR__PA_SC_WINDOW_SCISSOR_TL] = tl; + rstate->states[R600_SCISSOR__PA_SC_WINDOW_SCISSOR_BR] = br; + rstate->states[R600_SCISSOR__PA_SC_CLIPRECT_RULE] = 0x0000FFFF; + rstate->states[R600_SCISSOR__PA_SC_CLIPRECT_0_TL] = tl; + rstate->states[R600_SCISSOR__PA_SC_CLIPRECT_0_BR] = br; + rstate->states[R600_SCISSOR__PA_SC_CLIPRECT_1_TL] = tl; + rstate->states[R600_SCISSOR__PA_SC_CLIPRECT_1_BR] = br; + rstate->states[R600_SCISSOR__PA_SC_CLIPRECT_2_TL] = tl; + rstate->states[R600_SCISSOR__PA_SC_CLIPRECT_2_BR] = br; + rstate->states[R600_SCISSOR__PA_SC_CLIPRECT_3_TL] = tl; + rstate->states[R600_SCISSOR__PA_SC_CLIPRECT_3_BR] = br; + rstate->states[R600_SCISSOR__PA_SC_EDGERULE] = 0xAAAAAAAA; + rstate->states[R600_SCISSOR__PA_SC_GENERIC_SCISSOR_TL] = tl; + rstate->states[R600_SCISSOR__PA_SC_GENERIC_SCISSOR_BR] = br; + rstate->states[R600_SCISSOR__PA_SC_VPORT_SCISSOR_0_TL] = tl; + rstate->states[R600_SCISSOR__PA_SC_VPORT_SCISSOR_0_BR] = br; + if (radeon_state_pm4(rstate)) { + radeon_state_decref(rstate); + return NULL; + } + return rstate; +} + +static struct radeon_state *r600_viewport(struct r600_context *rctx) +{ + const struct pipe_viewport_state *state = &rctx->viewport->state.viewport; + struct r600_screen *rscreen = rctx->screen; + struct radeon_state *rstate; + + rstate = radeon_state(rscreen->rw, R600_VIEWPORT_TYPE, R600_VIEWPORT); + if (rstate == NULL) + return NULL; + rstate->states[R600_VIEWPORT__PA_SC_VPORT_ZMIN_0] = 0x00000000; + rstate->states[R600_VIEWPORT__PA_SC_VPORT_ZMAX_0] = 0x3F800000; + rstate->states[R600_VIEWPORT__PA_CL_VPORT_XSCALE_0] = fui(state->scale[0]); + rstate->states[R600_VIEWPORT__PA_CL_VPORT_YSCALE_0] = fui(state->scale[1]); + rstate->states[R600_VIEWPORT__PA_CL_VPORT_ZSCALE_0] = fui(state->scale[2]); + rstate->states[R600_VIEWPORT__PA_CL_VPORT_XOFFSET_0] = fui(state->translate[0]); + rstate->states[R600_VIEWPORT__PA_CL_VPORT_YOFFSET_0] = fui(state->translate[1]); + rstate->states[R600_VIEWPORT__PA_CL_VPORT_ZOFFSET_0] = fui(state->translate[2]); + rstate->states[R600_VIEWPORT__PA_CL_VTE_CNTL] = 0x0000043F; + if (radeon_state_pm4(rstate)) { + radeon_state_decref(rstate); + return NULL; + } + return rstate; +} + +static struct radeon_state *r600_dsa(struct r600_context *rctx) +{ + const struct pipe_depth_stencil_alpha_state *state = &rctx->dsa->state.dsa; + struct r600_screen *rscreen = rctx->screen; + struct radeon_state *rstate; + unsigned db_depth_control; + + rstate = radeon_state(rscreen->rw, R600_DSA_TYPE, R600_DSA); + if (rstate == NULL) + return NULL; + db_depth_control = 0x00700700 | S_028800_Z_ENABLE(state->depth.enabled) | S_028800_Z_WRITE_ENABLE(state->depth.writemask) | S_028800_ZFUNC(state->depth.func); + + rstate->states[R600_DSA__DB_STENCIL_CLEAR] = 0x00000000; + rstate->states[R600_DSA__DB_DEPTH_CLEAR] = 0x3F800000; + rstate->states[R600_DSA__SX_ALPHA_TEST_CONTROL] = 0x00000000; + rstate->states[R600_DSA__DB_STENCILREFMASK] = 0xFFFFFF00; + rstate->states[R600_DSA__DB_STENCILREFMASK_BF] = 0xFFFFFF00; + rstate->states[R600_DSA__SX_ALPHA_REF] = 0x00000000; + rstate->states[R600_DSA__SPI_FOG_FUNC_SCALE] = 0x00000000; + rstate->states[R600_DSA__SPI_FOG_FUNC_BIAS] = 0x00000000; + rstate->states[R600_DSA__SPI_FOG_CNTL] = 0x00000000; + rstate->states[R600_DSA__DB_DEPTH_CONTROL] = db_depth_control; + rstate->states[R600_DSA__DB_SHADER_CONTROL] = 0x00000210; + rstate->states[R600_DSA__DB_RENDER_CONTROL] = 0x00000060; + rstate->states[R600_DSA__DB_RENDER_OVERRIDE] = 0x0000002A; + rstate->states[R600_DSA__DB_SRESULTS_COMPARE_STATE1] = 0x00000000; + rstate->states[R600_DSA__DB_PRELOAD_CONTROL] = 0x00000000; + rstate->states[R600_DSA__DB_ALPHA_TO_MASK] = 0x0000AA00; + if (radeon_state_pm4(rstate)) { + radeon_state_decref(rstate); + return NULL; + } + return rstate; } static inline unsigned r600_tex_wrap(unsigned wrap) @@ -296,13 +893,14 @@ static inline unsigned r600_tex_compare(unsigned compare) } } -static void *r600_create_sampler_state(struct pipe_context *ctx, - const struct pipe_sampler_state *state) +static struct radeon_state *r600_sampler(struct r600_context *rctx, + const struct pipe_sampler_state *state, + unsigned id) { - struct r600_screen *rscreen = r600_screen(ctx->screen); + struct r600_screen *rscreen = rctx->screen; struct radeon_state *rstate; - rstate = radeon_state(rscreen->rw, R600_PS_SAMPLER_TYPE, R600_PS_SAMPLER); + rstate = radeon_state(rscreen->rw, R600_PS_SAMPLER_TYPE, id); if (rstate == NULL) return NULL; rstate->states[R600_PS_SAMPLER__SQ_TEX_SAMPLER_WORD0_0] = @@ -314,33 +912,16 @@ static void *r600_create_sampler_state(struct pipe_context *ctx, S_03C000_MIP_FILTER(r600_tex_mipfilter(state->min_mip_filter)) | S_03C000_DEPTH_COMPARE_FUNCTION(r600_tex_compare(state->compare_func)); /* FIXME LOD it depends on texture base level ... */ - rstate->states[R600_PS_SAMPLER__SQ_TEX_SAMPLER_WORD1_0] = - S_03C004_MIN_LOD(0) | - S_03C004_MAX_LOD(0) | - S_03C004_LOD_BIAS(0); - rstate->states[R600_PS_SAMPLER__SQ_TEX_SAMPLER_WORD2_0] = S_03C008_TYPE(1); - if (radeon_state_pm4(rstate)) { - radeon_state_decref(rstate); - return NULL; - } - return rstate; -} - -static void r600_bind_sampler_states(struct pipe_context *ctx, - unsigned count, void **states) -{ - struct r600_context *rctx = r600_context(ctx); - unsigned i; - - /* FIXME split VS/PS/GS sampler */ - for (i = 0; i < count; i++) { - rctx->ps_sampler[i] = radeon_state_decref(rctx->ps_sampler[i]); - } - rctx->nps_sampler = count; - for (i = 0; i < count; i++) { - rctx->ps_sampler[i] = radeon_state_incref(states[i]); - rctx->ps_sampler[i]->id = R600_PS_SAMPLER + i; + rstate->states[R600_PS_SAMPLER__SQ_TEX_SAMPLER_WORD1_0] = + S_03C004_MIN_LOD(0) | + S_03C004_MAX_LOD(0) | + S_03C004_LOD_BIAS(0); + rstate->states[R600_PS_SAMPLER__SQ_TEX_SAMPLER_WORD2_0] = S_03C008_TYPE(1); + if (radeon_state_pm4(rstate)) { + radeon_state_decref(rstate); + return NULL; } + return rstate; } static inline unsigned r600_tex_swizzle(unsigned swizzle) @@ -390,57 +971,47 @@ static inline unsigned r600_tex_dim(unsigned dim) } } -static struct pipe_sampler_view *r600_create_sampler_view(struct pipe_context *ctx, - struct pipe_resource *texture, - const struct pipe_sampler_view *view) +static struct radeon_state *r600_resource(struct r600_context *rctx, + const struct pipe_sampler_view *view, + unsigned id) { - struct r600_screen *rscreen = r600_screen(ctx->screen); - struct r600_texture_resource *rtexture; + struct r600_screen *rscreen = rctx->screen; const struct util_format_description *desc; struct r600_resource_texture *tmp; struct r600_resource *rbuffer; + struct radeon_state *rstate; unsigned format; - if (r600_conv_pipe_format(texture->format, &format)) + if (r600_conv_pipe_format(view->texture->format, &format)) return NULL; - rtexture = CALLOC_STRUCT(r600_texture_resource); - if (rtexture == NULL) - return NULL; - desc = util_format_description(texture->format); + desc = util_format_description(view->texture->format); assert(desc == NULL); - rtexture->state = radeon_state(rscreen->rw, R600_PS_RESOURCE_TYPE, R600_PS_RESOURCE); - if (rtexture->state == NULL) { - FREE(rtexture); + rstate = radeon_state(rscreen->rw, R600_PS_RESOURCE_TYPE, id); + if (rstate == NULL) { return NULL; } - rtexture->view = *view; - rtexture->view.reference.count = 1; - rtexture->view.texture = NULL; - pipe_resource_reference(&rtexture->view.texture, texture); - rtexture->view.context = ctx; - - tmp = (struct r600_resource_texture*)texture; + tmp = (struct r600_resource_texture*)view->texture; rbuffer = &tmp->resource; - rtexture->state->bo[0] = radeon_bo_incref(rscreen->rw, rbuffer->bo); - rtexture->state->bo[1] = radeon_bo_incref(rscreen->rw, rbuffer->bo); - rtexture->state->nbo = 2; - rtexture->state->placement[0] = RADEON_GEM_DOMAIN_GTT; - rtexture->state->placement[1] = RADEON_GEM_DOMAIN_GTT; - rtexture->state->placement[2] = RADEON_GEM_DOMAIN_GTT; - rtexture->state->placement[3] = RADEON_GEM_DOMAIN_GTT; + rstate->bo[0] = radeon_bo_incref(rscreen->rw, rbuffer->bo); + rstate->bo[1] = radeon_bo_incref(rscreen->rw, rbuffer->bo); + rstate->nbo = 2; + rstate->placement[0] = RADEON_GEM_DOMAIN_GTT; + rstate->placement[1] = RADEON_GEM_DOMAIN_GTT; + rstate->placement[2] = RADEON_GEM_DOMAIN_GTT; + rstate->placement[3] = RADEON_GEM_DOMAIN_GTT; /* FIXME properly handle first level != 0 */ - rtexture->state->states[R600_PS_RESOURCE__RESOURCE0_WORD0] = - S_038000_DIM(r600_tex_dim(texture->target)) | + rstate->states[R600_PS_RESOURCE__RESOURCE0_WORD0] = + S_038000_DIM(r600_tex_dim(view->texture->target)) | S_038000_PITCH((tmp->pitch[0] / 8) - 1) | - S_038000_TEX_WIDTH(texture->width0 - 1); - rtexture->state->states[R600_PS_RESOURCE__RESOURCE0_WORD1] = - S_038004_TEX_HEIGHT(texture->height0 - 1) | - S_038004_TEX_DEPTH(texture->depth0 - 1) | + S_038000_TEX_WIDTH(view->texture->width0 - 1); + rstate->states[R600_PS_RESOURCE__RESOURCE0_WORD1] = + S_038004_TEX_HEIGHT(view->texture->height0 - 1) | + S_038004_TEX_DEPTH(view->texture->depth0 - 1) | S_038004_DATA_FORMAT(format); - rtexture->state->states[R600_PS_RESOURCE__RESOURCE0_WORD2] = 0; - rtexture->state->states[R600_PS_RESOURCE__RESOURCE0_WORD3] = tmp->offset[1] >> 8; - rtexture->state->states[R600_PS_RESOURCE__RESOURCE0_WORD4] = + rstate->states[R600_PS_RESOURCE__RESOURCE0_WORD2] = 0; + rstate->states[R600_PS_RESOURCE__RESOURCE0_WORD3] = tmp->offset[1] >> 8; + rstate->states[R600_PS_RESOURCE__RESOURCE0_WORD4] = S_038010_FORMAT_COMP_X(r600_format_type(UTIL_FORMAT_TYPE_UNSIGNED)) | S_038010_FORMAT_COMP_Y(r600_format_type(UTIL_FORMAT_TYPE_UNSIGNED)) | S_038010_FORMAT_COMP_Z(r600_format_type(UTIL_FORMAT_TYPE_UNSIGNED)) | @@ -453,225 +1024,12 @@ static struct pipe_sampler_view *r600_create_sampler_view(struct pipe_context *c S_038010_DST_SEL_Z(r600_tex_swizzle(view->swizzle_b)) | S_038010_DST_SEL_W(r600_tex_swizzle(view->swizzle_a)) | S_038010_BASE_LEVEL(view->first_level); - rtexture->state->states[R600_PS_RESOURCE__RESOURCE0_WORD5] = + rstate->states[R600_PS_RESOURCE__RESOURCE0_WORD5] = S_038014_LAST_LEVEL(view->last_level) | S_038014_BASE_ARRAY(0) | S_038014_LAST_ARRAY(0); - rtexture->state->states[R600_PS_RESOURCE__RESOURCE0_WORD6] = + rstate->states[R600_PS_RESOURCE__RESOURCE0_WORD6] = S_038018_TYPE(V_038010_SQ_TEX_VTX_VALID_TEXTURE); - return &rtexture->view; -} - -static void r600_sampler_view_destroy(struct pipe_context *ctx, - struct pipe_sampler_view *view) -{ - struct r600_texture_resource *texture; - - if (view == NULL) - return; - texture = LIST_ENTRY(struct r600_texture_resource, view, view); - radeon_state_decref(texture->state); - FREE(texture); -} - -static void r600_set_fragment_sampler_views(struct pipe_context *ctx, - unsigned count, - struct pipe_sampler_view **views) -{ - struct r600_texture_resource *rtexture; - struct r600_context *rctx = r600_context(ctx); - struct pipe_sampler_view *tmp; - unsigned i, real_num_views = 0; - - if (views == NULL) - return; - - for (i = 0; i < count; i++) { - if (views[i]) - real_num_views++; - } - - for (i = 0; i < rctx->nps_view; i++) { - tmp = &rctx->ps_view[i]->view; - pipe_sampler_view_reference(&tmp, NULL); - rctx->ps_view[i] = NULL; - } - rctx->nps_view = real_num_views; - for (i = 0; i < count; i++) { - - if (!views[i]) - continue; - rtexture = LIST_ENTRY(struct r600_texture_resource, views[i], view); - rctx->ps_view[i] = rtexture; - tmp = NULL; - pipe_sampler_view_reference(&tmp, views[i]); - rtexture->state->id = R600_PS_RESOURCE + i; - } -} - -static void r600_set_vertex_sampler_views(struct pipe_context *ctx, - unsigned count, - struct pipe_sampler_view **views) -{ - struct r600_texture_resource *rtexture; - struct r600_context *rctx = r600_context(ctx); - struct pipe_sampler_view *tmp; - unsigned i, real_num_views = 0; - - if (views == NULL) - return; - - for (i = 0; i < count; i++) { - if (views[i]) - real_num_views++; - } - for (i = 0; i < rctx->nvs_view; i++) { - tmp = &rctx->vs_view[i]->view; - pipe_sampler_view_reference(&tmp, NULL); - rctx->vs_view[i] = NULL; - } - rctx->nvs_view = real_num_views; - for (i = 0; i < count; i++) { - if (!views[i]) - continue; - rtexture = LIST_ENTRY(struct r600_texture_resource, views[i], view); - rctx->vs_view[i] = rtexture; - tmp = NULL; - pipe_sampler_view_reference(&tmp, views[i]); - rtexture->state->id = R600_VS_RESOURCE + i; - } -} - -static void r600_set_scissor_state(struct pipe_context *ctx, - const struct pipe_scissor_state *state) -{ - struct r600_screen *rscreen = r600_screen(ctx->screen); - struct r600_context *rctx = r600_context(ctx); - struct radeon_state *rstate; - u32 tl, br; - - tl = S_028240_TL_X(state->minx) | S_028240_TL_Y(state->miny) | S_028240_WINDOW_OFFSET_DISABLE(1); - br = S_028244_BR_X(state->maxx) | S_028244_BR_Y(state->maxy); - rstate = radeon_state(rscreen->rw, R600_SCISSOR_TYPE, R600_SCISSOR); - if (rstate == NULL) - return; - rstate->states[R600_SCISSOR__PA_SC_SCREEN_SCISSOR_TL] = tl; - rstate->states[R600_SCISSOR__PA_SC_SCREEN_SCISSOR_BR] = br; - rstate->states[R600_SCISSOR__PA_SC_WINDOW_OFFSET] = 0x00000000; - rstate->states[R600_SCISSOR__PA_SC_WINDOW_SCISSOR_TL] = tl; - rstate->states[R600_SCISSOR__PA_SC_WINDOW_SCISSOR_BR] = br; - rstate->states[R600_SCISSOR__PA_SC_CLIPRECT_RULE] = 0x0000FFFF; - rstate->states[R600_SCISSOR__PA_SC_CLIPRECT_0_TL] = tl; - rstate->states[R600_SCISSOR__PA_SC_CLIPRECT_0_BR] = br; - rstate->states[R600_SCISSOR__PA_SC_CLIPRECT_1_TL] = tl; - rstate->states[R600_SCISSOR__PA_SC_CLIPRECT_1_BR] = br; - rstate->states[R600_SCISSOR__PA_SC_CLIPRECT_2_TL] = tl; - rstate->states[R600_SCISSOR__PA_SC_CLIPRECT_2_BR] = br; - rstate->states[R600_SCISSOR__PA_SC_CLIPRECT_3_TL] = tl; - rstate->states[R600_SCISSOR__PA_SC_CLIPRECT_3_BR] = br; - rstate->states[R600_SCISSOR__PA_SC_EDGERULE] = 0xAAAAAAAA; - rstate->states[R600_SCISSOR__PA_SC_GENERIC_SCISSOR_TL] = tl; - rstate->states[R600_SCISSOR__PA_SC_GENERIC_SCISSOR_BR] = br; - rstate->states[R600_SCISSOR__PA_SC_VPORT_SCISSOR_0_TL] = tl; - rstate->states[R600_SCISSOR__PA_SC_VPORT_SCISSOR_0_BR] = br; - if (radeon_state_pm4(rstate)) { - radeon_state_decref(rstate); - return; - } - radeon_draw_set_new(rctx->draw, rstate); -} - -static void r600_set_viewport_state(struct pipe_context *ctx, - const struct pipe_viewport_state *state) -{ - struct r600_screen *rscreen = r600_screen(ctx->screen); - struct r600_context *rctx = r600_context(ctx); - struct radeon_state *rstate; - - rstate = radeon_state(rscreen->rw, R600_VIEWPORT_TYPE, R600_VIEWPORT); - if (rstate == NULL) - return; - rstate->states[R600_VIEWPORT__PA_SC_VPORT_ZMIN_0] = 0x00000000; - rstate->states[R600_VIEWPORT__PA_SC_VPORT_ZMAX_0] = 0x3F800000; - rstate->states[R600_VIEWPORT__PA_CL_VPORT_XSCALE_0] = fui(state->scale[0]); - rstate->states[R600_VIEWPORT__PA_CL_VPORT_YSCALE_0] = fui(state->scale[1]); - rstate->states[R600_VIEWPORT__PA_CL_VPORT_ZSCALE_0] = fui(state->scale[2]); - rstate->states[R600_VIEWPORT__PA_CL_VPORT_XOFFSET_0] = fui(state->translate[0]); - rstate->states[R600_VIEWPORT__PA_CL_VPORT_YOFFSET_0] = fui(state->translate[1]); - rstate->states[R600_VIEWPORT__PA_CL_VPORT_ZOFFSET_0] = fui(state->translate[2]); - rstate->states[R600_VIEWPORT__PA_CL_VTE_CNTL] = 0x0000043F; - if (radeon_state_pm4(rstate)) { - radeon_state_decref(rstate); - return; - } - radeon_draw_set_new(rctx->draw, rstate); - rctx->viewport = *state; -} - -static void r600_set_vertex_buffers(struct pipe_context *ctx, - unsigned count, - const struct pipe_vertex_buffer *buffers) -{ - struct r600_context *rctx = r600_context(ctx); - - memcpy(rctx->vertex_buffer, buffers, sizeof(struct pipe_vertex_buffer) * count); - rctx->nvertex_buffer = count; -} - - -static void *r600_create_vertex_elements_state(struct pipe_context *ctx, - unsigned count, - const struct pipe_vertex_element *elements) -{ - struct r600_vertex_elements_state *v = CALLOC_STRUCT(r600_vertex_elements_state); - - assert(count < 32); - v->count = count; - memcpy(v->elements, elements, count * sizeof(struct pipe_vertex_element)); - return v; -} - -static void r600_bind_vertex_elements_state(struct pipe_context *ctx, void *state) -{ - struct r600_context *rctx = r600_context(ctx); - struct r600_vertex_elements_state *v = (struct r600_vertex_elements_state*)state; - - rctx->vertex_elements = v; -} - -static void r600_delete_vertex_elements_state(struct pipe_context *ctx, void *state) -{ - FREE(state); -} - -static void *r600_create_dsa_state(struct pipe_context *ctx, - const struct pipe_depth_stencil_alpha_state *state) -{ - struct r600_screen *rscreen = r600_screen(ctx->screen); - struct radeon_state *rstate; - unsigned db_depth_control; - - rstate = radeon_state(rscreen->rw, R600_DSA_TYPE, R600_DSA); - if (rstate == NULL) - return NULL; - db_depth_control = 0x00700700 | S_028800_Z_ENABLE(state->depth.enabled) | S_028800_Z_WRITE_ENABLE(state->depth.writemask) | S_028800_ZFUNC(state->depth.func); - - rstate->states[R600_DSA__DB_STENCIL_CLEAR] = 0x00000000; - rstate->states[R600_DSA__DB_DEPTH_CLEAR] = 0x3F800000; - rstate->states[R600_DSA__SX_ALPHA_TEST_CONTROL] = 0x00000000; - rstate->states[R600_DSA__DB_STENCILREFMASK] = 0xFFFFFF00; - rstate->states[R600_DSA__DB_STENCILREFMASK_BF] = 0xFFFFFF00; - rstate->states[R600_DSA__SX_ALPHA_REF] = 0x00000000; - rstate->states[R600_DSA__SPI_FOG_FUNC_SCALE] = 0x00000000; - rstate->states[R600_DSA__SPI_FOG_FUNC_BIAS] = 0x00000000; - rstate->states[R600_DSA__SPI_FOG_CNTL] = 0x00000000; - rstate->states[R600_DSA__DB_DEPTH_CONTROL] = db_depth_control; - rstate->states[R600_DSA__DB_SHADER_CONTROL] = 0x00000210; - rstate->states[R600_DSA__DB_RENDER_CONTROL] = 0x00000060; - rstate->states[R600_DSA__DB_RENDER_OVERRIDE] = 0x0000002A; - rstate->states[R600_DSA__DB_SRESULTS_COMPARE_STATE1] = 0x00000000; - rstate->states[R600_DSA__DB_PRELOAD_CONTROL] = 0x00000000; - rstate->states[R600_DSA__DB_ALPHA_TO_MASK] = 0x0000AA00; if (radeon_state_pm4(rstate)) { radeon_state_decref(rstate); return NULL; @@ -679,105 +1037,100 @@ static void *r600_create_dsa_state(struct pipe_context *ctx, return rstate; } -static void r600_bind_dsa_state(struct pipe_context *ctx, void *state) -{ - struct r600_context *rctx = r600_context(ctx); - radeon_draw_set(rctx->draw, state); -} - -static void r600_set_constant_buffer(struct pipe_context *ctx, - uint shader, uint index, - struct pipe_resource *buffer) +int r600_context_hw_states(struct r600_context *rctx) { - struct r600_screen *rscreen = r600_screen(ctx->screen); - struct r600_context *rctx = r600_context(ctx); - unsigned nconstant = 0, i, type, id; - struct radeon_state *rstate; - struct pipe_transfer *transfer; - u32 *ptr; - - switch (shader) { - case PIPE_SHADER_VERTEX: - id = R600_VS_CONSTANT; - type = R600_VS_CONSTANT_TYPE; - break; - case PIPE_SHADER_FRAGMENT: - id = R600_PS_CONSTANT; - type = R600_PS_CONSTANT_TYPE; - break; - default: - fprintf(stderr, "%s:%d unsupported %d\n", __func__, __LINE__, shader); - return; + unsigned i; + int r; + + /* free previous TODO determine what need to be updated, what + * doesn't + */ + //radeon_state_decref(rctx->hw_states.config); + //radeon_state_decref(rctx->hw_states.cb_cntl); + radeon_state_decref(rctx->hw_states.db); + radeon_state_decref(rctx->hw_states.rasterizer); + radeon_state_decref(rctx->hw_states.scissor); + radeon_state_decref(rctx->hw_states.dsa); + radeon_state_decref(rctx->hw_states.blend); + radeon_state_decref(rctx->hw_states.viewport); + radeon_state_decref(rctx->hw_states.cb0); + for (i = 0; i < rctx->hw_states.ps_nresource; i++) { + radeon_state_decref(rctx->hw_states.ps_resource[i]); + rctx->hw_states.ps_resource[i] = NULL; } - if (buffer && buffer->width0 > 0) { - nconstant = buffer->width0 / 16; - ptr = pipe_buffer_map(ctx, buffer, PIPE_TRANSFER_READ, &transfer); - if (ptr == NULL) - return; - for (i = 0; i < nconstant; i++) { - rstate = radeon_state(rscreen->rw, type, id + i); - if (rstate == NULL) - return; - rstate->states[R600_PS_CONSTANT__SQ_ALU_CONSTANT0_0] = ptr[i * 4 + 0]; - rstate->states[R600_PS_CONSTANT__SQ_ALU_CONSTANT1_0] = ptr[i * 4 + 1]; - rstate->states[R600_PS_CONSTANT__SQ_ALU_CONSTANT2_0] = ptr[i * 4 + 2]; - rstate->states[R600_PS_CONSTANT__SQ_ALU_CONSTANT3_0] = ptr[i * 4 + 3]; - if (radeon_state_pm4(rstate)) - return; - if (radeon_draw_set_new(rctx->draw, rstate)) - return; + rctx->hw_states.ps_nresource = 0; + for (i = 0; i < rctx->hw_states.ps_nsampler; i++) { + radeon_state_decref(rctx->hw_states.ps_sampler[i]); + rctx->hw_states.ps_sampler[i] = NULL; + } + rctx->hw_states.ps_nsampler = 0; + + /* build new states */ + rctx->hw_states.rasterizer = r600_rasterizer(rctx); + rctx->hw_states.scissor = r600_scissor(rctx); + rctx->hw_states.dsa = r600_dsa(rctx); + rctx->hw_states.blend = r600_blend(rctx); + rctx->hw_states.viewport = r600_viewport(rctx); + rctx->hw_states.cb0 = r600_cb0(rctx); + rctx->hw_states.db = r600_db(rctx); + for (i = 0; i < rctx->ps_nsampler; i++) { + if (rctx->ps_sampler[i]) { + rctx->hw_states.ps_sampler[i] = r600_sampler(rctx, + &rctx->ps_sampler[i]->state.sampler, + R600_PS_SAMPLER + i); } - pipe_buffer_unmap(ctx, buffer, transfer); } -} - -static void r600_set_stencil_ref(struct pipe_context *ctx, - const struct pipe_stencil_ref *sr) -{ - struct r600_context *rctx = r600_context(ctx); - rctx->stencil_ref = *sr; -} - -static void r600_set_sample_mask(struct pipe_context *pipe, unsigned sample_mask) -{ -} - -void r600_init_state_functions(struct r600_context *rctx) -{ - rctx->context.set_sample_mask = r600_set_sample_mask; - rctx->context.create_blend_state = r600_create_blend_state; - rctx->context.bind_blend_state = r600_bind_blend_state; - rctx->context.delete_blend_state = r600_delete_state; - rctx->context.set_blend_color = r600_set_blend_color; - rctx->context.set_clip_state = r600_set_clip_state; - rctx->context.set_constant_buffer = r600_set_constant_buffer; - rctx->context.create_depth_stencil_alpha_state = r600_create_dsa_state; - rctx->context.bind_depth_stencil_alpha_state = r600_bind_dsa_state; - rctx->context.delete_depth_stencil_alpha_state = r600_delete_state; - rctx->context.set_framebuffer_state = r600_set_framebuffer_state; - rctx->context.create_fs_state = r600_create_fs_state; - rctx->context.bind_fs_state = r600_bind_fs_state; - rctx->context.delete_fs_state = r600_delete_state; - rctx->context.set_polygon_stipple = r600_set_polygon_stipple; - rctx->context.create_rasterizer_state = r600_create_rs_state; - rctx->context.bind_rasterizer_state = r600_bind_rs_state; - rctx->context.delete_rasterizer_state = r600_delete_state; - rctx->context.create_sampler_state = r600_create_sampler_state; - rctx->context.bind_fragment_sampler_states = r600_bind_sampler_states; - rctx->context.bind_vertex_sampler_states = r600_bind_sampler_states; - rctx->context.delete_sampler_state = r600_delete_state; - rctx->context.create_sampler_view = r600_create_sampler_view; - rctx->context.sampler_view_destroy = r600_sampler_view_destroy; - rctx->context.set_fragment_sampler_views = r600_set_fragment_sampler_views; - rctx->context.set_vertex_sampler_views = r600_set_vertex_sampler_views; - rctx->context.set_scissor_state = r600_set_scissor_state; - rctx->context.set_viewport_state = r600_set_viewport_state; - rctx->context.set_vertex_buffers = r600_set_vertex_buffers; - rctx->context.create_vertex_elements_state = r600_create_vertex_elements_state; - rctx->context.bind_vertex_elements_state = r600_bind_vertex_elements_state; - rctx->context.delete_vertex_elements_state = r600_delete_vertex_elements_state; - rctx->context.create_vs_state = r600_create_vs_state; - rctx->context.bind_vs_state = r600_bind_vs_state; - rctx->context.delete_vs_state = r600_delete_state; - rctx->context.set_stencil_ref = r600_set_stencil_ref; + rctx->hw_states.ps_nsampler = rctx->ps_nsampler; + for (i = 0; i < rctx->ps_nsampler_view; i++) { + if (rctx->ps_sampler_view[i]) { + rctx->hw_states.ps_resource[i] = r600_resource(rctx, + &rctx->ps_sampler_view[i]->state.sampler_view, + R600_PS_RESOURCE + i); + } + } + rctx->hw_states.ps_nresource = rctx->ps_nsampler_view; + + /* bind states */ + r = radeon_draw_set(rctx->draw, rctx->hw_states.db); + if (r) + return r; + r = radeon_draw_set(rctx->draw, rctx->hw_states.rasterizer); + if (r) + return r; + r = radeon_draw_set(rctx->draw, rctx->hw_states.scissor); + if (r) + return r; + r = radeon_draw_set(rctx->draw, rctx->hw_states.dsa); + if (r) + return r; + r = radeon_draw_set(rctx->draw, rctx->hw_states.blend); + if (r) + return r; + r = radeon_draw_set(rctx->draw, rctx->hw_states.viewport); + if (r) + return r; + r = radeon_draw_set(rctx->draw, rctx->hw_states.cb0); + if (r) + return r; + r = radeon_draw_set(rctx->draw, rctx->hw_states.config); + if (r) + return r; + r = radeon_draw_set(rctx->draw, rctx->hw_states.cb_cntl); + if (r) + return r; + for (i = 0; i < rctx->hw_states.ps_nresource; i++) { + if (rctx->hw_states.ps_resource[i]) { + r = radeon_draw_set(rctx->draw, rctx->hw_states.ps_resource[i]); + if (r) + return r; + } + } + for (i = 0; i < rctx->hw_states.ps_nsampler; i++) { + if (rctx->hw_states.ps_sampler[i]) { + r = radeon_draw_set(rctx->draw, rctx->hw_states.ps_sampler[i]); + if (r) + return r; + } + } + return 0; } diff --git a/src/gallium/winsys/r600/drm/r600_drm.c b/src/gallium/winsys/r600/drm/r600_drm.c index 3d87a994c1..9520792f54 100644 --- a/src/gallium/winsys/r600/drm/r600_drm.c +++ b/src/gallium/winsys/r600/drm/r600_drm.c @@ -30,7 +30,7 @@ #include "util/u_debug.h" #include "radeon_priv.h" #include "r600_screen.h" -#include "r600_texture.h" +#include "r600_resource.h" #include "r600_public.h" #include "r600_drm_public.h" #include "state_tracker/drm_driver.h" @@ -45,7 +45,7 @@ boolean r600_buffer_get_handle(struct radeon *rw, struct winsys_handle *whandle) { struct drm_gem_flink flink; - struct r600_buffer* rbuffer = (struct r600_buffer*)buf; + struct r600_resource* rbuffer = (struct r600_buffer*)buf; if (whandle->type == DRM_API_HANDLE_TYPE_SHARED) { if (!rbuffer->flink) { -- cgit v1.2.3 From 35e044ab562b65aa53f9d9d7b5885e6a887774bb Mon Sep 17 00:00:00 2001 From: Jerome Glisse Date: Wed, 28 Jul 2010 19:59:38 -0400 Subject: r600g: switch btw flat/linear interpolation I am not sure how to properly handle flat shading regarding non color parameter to fragment shader. It seems we should still interpolate non color using linear interpolation and flat shade only apply to color. Signed-off-by: Jerome Glisse --- src/gallium/drivers/r600/r600_shader.c | 7 ++++++- src/gallium/drivers/r600/r600_shader.h | 1 + src/gallium/drivers/r600/r600_state.c | 2 -- 3 files changed, 7 insertions(+), 3 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index 8837a7272b..3f1979b9cc 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -169,7 +169,10 @@ static int r600_pipe_shader_ps(struct pipe_context *ctx, struct r600_context_sta for (i = 0; i < rshader->ninput; i++) { tmp = S_028644_SEMANTIC(rshader->input[i].sid); tmp |= S_028644_SEL_CENTROID(1); - tmp |= S_028644_FLAT_SHADE(rshader->flat_shade); + if (rshader->input[i].name == TGSI_SEMANTIC_COLOR || + rshader->input[i].name == TGSI_SEMANTIC_BCOLOR) { + tmp |= S_028644_FLAT_SHADE(rshader->flat_shade); + } state->states[R600_PS_SHADER__SPI_PS_INPUT_CNTL_0 + i] = tmp; } state->states[R600_PS_SHADER__SPI_PS_IN_CONTROL_0] = S_0286CC_NUM_INTERP(rshader->ninput) | @@ -287,6 +290,7 @@ static int tgsi_declaration(struct r600_shader_ctx *ctx) i = ctx->shader->ninput++; ctx->shader->input[i].name = d->Semantic.Name; ctx->shader->input[i].sid = d->Semantic.Index; + ctx->shader->input[i].interpolate = d->Declaration.Interpolate; ctx->shader->input[i].gpr = ctx->file_offset[TGSI_FILE_INPUT] + i; if (ctx->type == TGSI_PROCESSOR_VERTEX) { /* turn input into fetch */ @@ -313,6 +317,7 @@ static int tgsi_declaration(struct r600_shader_ctx *ctx) ctx->shader->output[i].name = d->Semantic.Name; ctx->shader->output[i].sid = d->Semantic.Index; ctx->shader->output[i].gpr = ctx->file_offset[TGSI_FILE_OUTPUT] + i; + ctx->shader->output[i].interpolate = d->Declaration.Interpolate; break; case TGSI_FILE_CONSTANT: case TGSI_FILE_TEMPORARY: diff --git a/src/gallium/drivers/r600/r600_shader.h b/src/gallium/drivers/r600/r600_shader.h index 23b6a83b9a..ee0381e8bd 100644 --- a/src/gallium/drivers/r600/r600_shader.h +++ b/src/gallium/drivers/r600/r600_shader.h @@ -29,6 +29,7 @@ struct r600_shader_io { unsigned name; unsigned gpr; int sid; + unsigned interpolate; }; struct r600_shader { diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index 0f1e1cd761..5b98dbe236 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -692,8 +692,6 @@ static struct radeon_state *r600_rasterizer(struct r600_context *rctx) struct radeon_state *rstate; rctx->flat_shade = state->flatshade; - rctx->flat_shade = 0; -R600_ERR("flat shade with texture broke tex coord interp\n"); rstate = radeon_state(rscreen->rw, R600_RASTERIZER_TYPE, R600_RASTERIZER); if (rstate == NULL) return NULL; -- cgit v1.2.3 From 42c1f27149828e5b5143f5e53ca3bd7c04a4e762 Mon Sep 17 00:00:00 2001 From: Jerome Glisse Date: Wed, 28 Jul 2010 20:09:15 -0400 Subject: r600g: state context ptr in sampler_view & add I8/L8 buffer format Signed-off-by: Jerome Glisse --- src/gallium/drivers/r600/r600_helper.c | 2 ++ src/gallium/drivers/r600/r600_state.c | 1 + 2 files changed, 3 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/r600_helper.c b/src/gallium/drivers/r600/r600_helper.c index 7241ab1c17..132abf90a3 100644 --- a/src/gallium/drivers/r600/r600_helper.c +++ b/src/gallium/drivers/r600/r600_helper.c @@ -59,6 +59,8 @@ int r600_conv_pipe_format(unsigned pformat, unsigned *format) case PIPE_FORMAT_L8_UNORM: case PIPE_FORMAT_A8_UNORM: case PIPE_FORMAT_I8_UNORM: + *format = V_0280A0_COLOR_8; + return 0; case PIPE_FORMAT_L16_UNORM: case PIPE_FORMAT_Z16_UNORM: case PIPE_FORMAT_Z32_UNORM: diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index 5b98dbe236..ff5df855c6 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -83,6 +83,7 @@ static struct pipe_sampler_view *r600_create_sampler_view(struct pipe_context *c pipe_reference(NULL, &texture->reference); rstate->state.sampler_view.texture = texture; rstate->state.sampler_view.reference.count = 1; + rstate->state.sampler_view.context = ctx; return &rstate->state.sampler_view; } -- cgit v1.2.3 From 6d28bf917fb1d741d90fd3f05c22769376021fca Mon Sep 17 00:00:00 2001 From: Chia-I Wu Date: Fri, 16 Jul 2010 04:35:58 +0800 Subject: gallium: Implement draw_vbo and set_index_buffer for all drivers. Some drivers define a generic function that is called by all drawing functions. To implement draw_vbo for such drivers, either draw_vbo calls the generic function or the prototype of the generic function is changed to match draw_vbo. Other drivers have no such generic function. draw_vbo is implemented by calling either draw_arrays and draw_elements. For most drivers, set_index_buffer does not mark the state dirty for tracking. Instead, the index buffer state is emitted whenever draw_vbo is called, just like the case with draw_elements. It surely can be improved. --- src/gallium/auxiliary/util/u_draw_quad.h | 30 +++++ src/gallium/drivers/cell/ppu/cell_context.h | 1 + src/gallium/drivers/cell/ppu/cell_draw_arrays.c | 69 ++++++++--- src/gallium/drivers/cell/ppu/cell_state_vertex.c | 16 +++ src/gallium/drivers/failover/fo_context.c | 61 ++++++---- src/gallium/drivers/failover/fo_context.h | 2 + src/gallium/drivers/failover/fo_state.c | 18 +++ src/gallium/drivers/failover/fo_state_emit.c | 5 + src/gallium/drivers/galahad/glhd_context.c | 47 +++++++ src/gallium/drivers/i915/i915_context.c | 70 ++++++++--- src/gallium/drivers/i915/i915_context.h | 1 + src/gallium/drivers/i915/i915_state.c | 14 +++ src/gallium/drivers/i965/brw_context.h | 1 + src/gallium/drivers/i965/brw_draw.c | 87 ++++++++----- src/gallium/drivers/i965/brw_draw_upload.c | 7 +- src/gallium/drivers/i965/brw_pipe_vertex.c | 31 +++++ src/gallium/drivers/identity/id_context.c | 30 +++++ src/gallium/drivers/llvmpipe/lp_context.h | 1 + src/gallium/drivers/llvmpipe/lp_draw_arrays.c | 90 ++++++++++---- src/gallium/drivers/llvmpipe/lp_state_vertex.c | 14 +++ src/gallium/drivers/nv50/nv50_context.c | 1 + src/gallium/drivers/nv50/nv50_context.h | 3 + src/gallium/drivers/nv50/nv50_state.c | 15 +++ src/gallium/drivers/nv50/nv50_vbo.c | 31 +++++ src/gallium/drivers/nvfx/nvfx_context.c | 1 + src/gallium/drivers/nvfx/nvfx_context.h | 5 +- src/gallium/drivers/nvfx/nvfx_state.c | 15 +++ src/gallium/drivers/nvfx/nvfx_vbo.c | 39 +++++- src/gallium/drivers/r300/r300_context.h | 2 + src/gallium/drivers/r300/r300_render.c | 142 ++++++++++++++-------- src/gallium/drivers/r300/r300_render_stencilref.c | 22 ++++ src/gallium/drivers/r300/r300_state.c | 18 +++ src/gallium/drivers/r600/r600_context.c | 1 + src/gallium/drivers/r600/r600_context.h | 3 + src/gallium/drivers/r600/r600_draw.c | 27 ++++ src/gallium/drivers/r600/r600_state.c | 18 +++ src/gallium/drivers/rbug/rbug_context.c | 34 ++++++ src/gallium/drivers/softpipe/sp_context.c | 2 + src/gallium/drivers/softpipe/sp_context.h | 1 + src/gallium/drivers/softpipe/sp_draw_arrays.c | 92 +++++++++----- src/gallium/drivers/softpipe/sp_state.h | 7 ++ src/gallium/drivers/softpipe/sp_state_vertex.c | 14 +++ src/gallium/drivers/svga/svga_context.h | 1 + src/gallium/drivers/svga/svga_pipe_draw.c | 24 ++++ src/gallium/drivers/svga/svga_pipe_vertex.c | 19 +++ src/gallium/drivers/trace/tr_context.c | 52 ++++++++ src/gallium/drivers/trace/tr_dump_state.c | 20 +++ src/gallium/drivers/trace/tr_dump_state.h | 2 + 48 files changed, 1004 insertions(+), 202 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/auxiliary/util/u_draw_quad.h b/src/gallium/auxiliary/util/u_draw_quad.h index 42eb184428..1c9f752611 100644 --- a/src/gallium/auxiliary/util/u_draw_quad.h +++ b/src/gallium/auxiliary/util/u_draw_quad.h @@ -29,12 +29,42 @@ #define U_DRAWQUAD_H +#include "pipe/p_compiler.h" +#include "pipe/p_context.h" + + #ifdef __cplusplus extern "C" { #endif struct pipe_resource; + +static INLINE void +util_draw_init_info(struct pipe_draw_info *info) +{ + memset(info, 0, sizeof(*info)); + info->instance_count = 1; + info->max_index = 0xffffffff; +} + + +static INLINE void +util_draw_arrays(struct pipe_context *pipe, uint mode, uint start, uint count) +{ + struct pipe_draw_info info; + + util_draw_init_info(&info); + info.mode = mode; + info.start = start; + info.count = count; + info.min_index = start; + info.max_index = start + count - 1; + + pipe->draw_vbo(pipe, &info); +} + + extern void util_draw_vertex_buffer(struct pipe_context *pipe, struct pipe_resource *vbuf, uint offset, diff --git a/src/gallium/drivers/cell/ppu/cell_context.h b/src/gallium/drivers/cell/ppu/cell_context.h index dc46e59a2d..d1aee62ba1 100644 --- a/src/gallium/drivers/cell/ppu/cell_context.h +++ b/src/gallium/drivers/cell/ppu/cell_context.h @@ -132,6 +132,7 @@ struct cell_context struct pipe_viewport_state viewport; struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS]; uint num_vertex_buffers; + struct pipe_index_buffer index_buffer; ubyte *cbuf_map[PIPE_MAX_COLOR_BUFS]; ubyte *zsbuf_map; diff --git a/src/gallium/drivers/cell/ppu/cell_draw_arrays.c b/src/gallium/drivers/cell/ppu/cell_draw_arrays.c index 6a1e4d8a64..e06226fbfe 100644 --- a/src/gallium/drivers/cell/ppu/cell_draw_arrays.c +++ b/src/gallium/drivers/cell/ppu/cell_draw_arrays.c @@ -34,6 +34,7 @@ #include "pipe/p_defines.h" #include "pipe/p_context.h" #include "util/u_inlines.h" +#include "util/u_draw_quad.h" #include "cell_context.h" #include "cell_draw_arrays.h" @@ -56,16 +57,11 @@ * XXX should the element buffer be specified/bound with a separate function? */ static void -cell_draw_range_elements(struct pipe_context *pipe, - struct pipe_resource *indexBuffer, - unsigned indexSize, - int indexBias, - unsigned min_index, - unsigned max_index, - unsigned mode, unsigned start, unsigned count) +cell_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) { struct cell_context *cell = cell_context(pipe); struct draw_context *draw = cell->draw; + void *mapped_indices = NULL; unsigned i; if (cell->dirty) @@ -83,18 +79,20 @@ cell_draw_range_elements(struct pipe_context *pipe, draw_set_mapped_vertex_buffer(draw, i, buf); } /* Map index buffer, if present */ - if (indexBuffer) { - void *mapped_indexes = cell_resource(indexBuffer)->data; - draw_set_mapped_element_buffer(draw, indexSize, indexBias, mapped_indexes); - } - else { - /* no index/element buffer */ - draw_set_mapped_element_buffer(draw, 0, 0, NULL); + if (info->indexed && cell->index_buffer.buffer) { + mapped_indices = cell_resource(cell->index_buffer.buffer)->data; + mapped_indices += cell->index_buffer.offset; } + draw_set_mapped_element_buffer_range(draw, (mapped_indices) ? + lp->index_buffer.index_size : 0, + info->index_bias, + info->min_index, + info->max_index, + mapped_indices); /* draw! */ - draw_arrays(draw, mode, start, count); + draw_arrays(draw, info->mode, info->start, info->count); /* * unmap vertex/index buffers - will cause draw module to flush @@ -102,7 +100,7 @@ cell_draw_range_elements(struct pipe_context *pipe, for (i = 0; i < cell->num_vertex_buffers; i++) { draw_set_mapped_vertex_buffer(draw, i, NULL); } - if (indexBuffer) { + if (mapped_indices) { draw_set_mapped_element_buffer(draw, 0, 0, NULL); } @@ -115,6 +113,44 @@ cell_draw_range_elements(struct pipe_context *pipe, } +static void +cell_draw_range_elements(struct pipe_context *pipe, + struct pipe_resource *indexBuffer, + unsigned indexSize, + int indexBias, + unsigned min_index, + unsigned max_index, + unsigned mode, unsigned start, unsigned count) +{ + struct cell_context *cell = cell_context(pipe); + struct pipe_draw_info info; + struct pipe_index_buffer saved_ib, ib; + + util_draw_init_info(&info); + info.mode = mode; + info.start = start; + info.count = count; + info.index_bias = indexBias; + info.min_index = min_index; + info.max_index = max_index; + + if (indexBuffer) { + info.indexed = TRUE; + saved_ib = cell->index_buffer; + + ib.buffer = indexBuffer; + ib.offset = 0; + ib.index_size = indexSize; + pipe->set_index_buffer(pipe, &ib); + } + + cell_draw_vbo(pipe, &info); + + if (indexBuffer) + pipe->set_index_buffer(pipe, &saved_ib); +} + + static void cell_draw_elements(struct pipe_context *pipe, struct pipe_resource *indexBuffer, @@ -142,5 +178,6 @@ cell_init_draw_functions(struct cell_context *cell) cell->pipe.draw_arrays = cell_draw_arrays; cell->pipe.draw_elements = cell_draw_elements; cell->pipe.draw_range_elements = cell_draw_range_elements; + cell->pipe.draw_vbo = cell_draw_vbo; } diff --git a/src/gallium/drivers/cell/ppu/cell_state_vertex.c b/src/gallium/drivers/cell/ppu/cell_state_vertex.c index 69152b6cbf..4e3701cd0a 100644 --- a/src/gallium/drivers/cell/ppu/cell_state_vertex.c +++ b/src/gallium/drivers/cell/ppu/cell_state_vertex.c @@ -91,10 +91,26 @@ cell_set_vertex_buffers(struct pipe_context *pipe, } +static void +cell_set_index_buffer(struct pipe_context *pipe, + const struct pipe_index_buffer *ib) +{ + struct cell_context *cell = cell_context(pipe); + + if (ib) + memcpy(&cell->index_buffer, ib, sizeof(cell->index_buffer)); + else + memset(&cell->index_buffer, 0, sizeof(cell->index_buffer)); + + /* TODO make this more like a state */ +} + + void cell_init_vertex_functions(struct cell_context *cell) { cell->pipe.set_vertex_buffers = cell_set_vertex_buffers; + cell->pipe.set_index_buffer = cell_set_index_buffer; cell->pipe.create_vertex_elements_state = cell_create_vertex_elements_state; cell->pipe.bind_vertex_elements_state = cell_bind_vertex_elements_state; cell->pipe.delete_vertex_elements_state = cell_delete_vertex_elements_state; diff --git a/src/gallium/drivers/failover/fo_context.c b/src/gallium/drivers/failover/fo_context.c index 9c9c1bdc45..1048d58313 100644 --- a/src/gallium/drivers/failover/fo_context.c +++ b/src/gallium/drivers/failover/fo_context.c @@ -28,6 +28,7 @@ #include "pipe/p_defines.h" #include "util/u_memory.h" +#include "util/u_draw_quad.h" #include "pipe/p_context.h" #include "fo_context.h" @@ -50,13 +51,8 @@ void failover_fail_over( struct failover_context *failover ) } -static void failover_draw_elements( struct pipe_context *pipe, - struct pipe_resource *indexResource, - unsigned indexSize, - int indexBias, - unsigned prim, - unsigned start, - unsigned count) +static void failover_draw_vbo( struct pipe_context *pipe, + const struct pipe_draw_info *info) { struct failover_context *failover = failover_context( pipe ); @@ -70,13 +66,7 @@ static void failover_draw_elements( struct pipe_context *pipe, /* Try hardware: */ if (failover->mode == FO_HW) { - failover->hw->draw_elements( failover->hw, - indexResource, - indexSize, - indexBias, - prim, - start, - count ); + failover->hw->draw_vbo( failover->hw, info ); } /* Possibly try software: @@ -88,13 +78,7 @@ static void failover_draw_elements( struct pipe_context *pipe, failover_state_emit( failover ); } - failover->sw->draw_elements( failover->sw, - indexResource, - indexSize, - indexBias, - prim, - start, - count ); + failover->sw->draw_vbo( failover->sw, info ); /* Be ready to switch back to hardware rendering without an * intervening flush. Unlikely to be much performance impact to @@ -105,6 +89,40 @@ static void failover_draw_elements( struct pipe_context *pipe, } +static void failover_draw_elements( struct pipe_context *pipe, + struct pipe_resource *indexResource, + unsigned indexSize, + int indexBias, + unsigned prim, + unsigned start, + unsigned count) +{ + struct failover_context *failover = failover_context( pipe ); + struct pipe_draw_info info; + struct pipe_index_buffer saved_ib, ib; + + util_draw_init_info(&info); + info.mode = prim; + info.start = start; + info.count = count; + + if (indexResource) { + info.indexed = TRUE; + saved_ib = failover->index_buffer; + + ib.buffer = indexResource; + ib.offset = 0; + ib.index_size = indexSize; + pipe->set_index_buffer(pipe, &ib); + } + + failover_draw_vbo(pipe, &info); + + if (indexResource) + pipe->set_index_buffer(pipe, &saved_ib); +} + + static void failover_draw_arrays( struct pipe_context *pipe, unsigned prim, unsigned start, unsigned count) { @@ -145,6 +163,7 @@ struct pipe_context *failover_create( struct pipe_context *hw, failover->pipe.draw_arrays = failover_draw_arrays; failover->pipe.draw_elements = failover_draw_elements; + failover->pipe.draw_vbo = failover_draw_vbo; failover->pipe.clear = hw->clear; failover->pipe.clear_render_target = hw->clear_render_target; failover->pipe.clear_depth_stencil = hw->clear_depth_stencil; diff --git a/src/gallium/drivers/failover/fo_context.h b/src/gallium/drivers/failover/fo_context.h index 9d3e0d0dba..1afa6c9cee 100644 --- a/src/gallium/drivers/failover/fo_context.h +++ b/src/gallium/drivers/failover/fo_context.h @@ -56,6 +56,7 @@ #define FO_NEW_VERTEX_BUFFER 0x40000 #define FO_NEW_VERTEX_ELEMENT 0x80000 #define FO_NEW_SAMPLE_MASK 0x100000 +#define FO_NEW_INDEX_BUFFER 0x200000 @@ -97,6 +98,7 @@ struct failover_context { struct pipe_scissor_state scissor; struct pipe_viewport_state viewport; struct pipe_vertex_buffer vertex_buffers[PIPE_MAX_ATTRIBS]; + struct pipe_index_buffer index_buffer; uint num_vertex_buffers; diff --git a/src/gallium/drivers/failover/fo_state.c b/src/gallium/drivers/failover/fo_state.c index 12e42379f9..c265f381b6 100644 --- a/src/gallium/drivers/failover/fo_state.c +++ b/src/gallium/drivers/failover/fo_state.c @@ -583,6 +583,23 @@ failover_set_vertex_buffers(struct pipe_context *pipe, } +static void +failover_set_index_buffer(struct pipe_context *pipe, + const struct pipe_index_buffer *ib) +{ + struct failover_context *failover = failover_context(pipe); + + if (ib) + memcpy(&failover->index_buffer, ib, sizeof(failover->index_buffer)); + else + memset(&failover->index_buffer, 0, sizeof(failover->index_buffer)); + + failover->dirty |= FO_NEW_INDEX_BUFFER; + failover->sw->set_index_buffer( failover->sw, ib ); + failover->hw->set_index_buffer( failover->hw, ib ); +} + + void failover_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, @@ -635,6 +652,7 @@ failover_init_state_functions( struct failover_context *failover ) failover->pipe.set_vertex_sampler_views = failover_set_vertex_sampler_views; failover->pipe.set_viewport_state = failover_set_viewport_state; failover->pipe.set_vertex_buffers = failover_set_vertex_buffers; + failover->pipe.set_index_buffer = failover_set_index_buffer; failover->pipe.set_constant_buffer = failover_set_constant_buffer; failover->pipe.create_sampler_view = failover_create_sampler_view; failover->pipe.sampler_view_destroy = failover_sampler_view_destroy; diff --git a/src/gallium/drivers/failover/fo_state_emit.c b/src/gallium/drivers/failover/fo_state_emit.c index 147f23269c..7f434ff9d6 100644 --- a/src/gallium/drivers/failover/fo_state_emit.c +++ b/src/gallium/drivers/failover/fo_state_emit.c @@ -135,5 +135,10 @@ failover_state_emit( struct failover_context *failover ) failover->vertex_buffers ); } + if (failover->dirty & FO_NEW_INDEX_BUFFER) { + failover->sw->set_index_buffer( failover->sw, + &failover->index_buffer ); + } + failover->dirty = 0; } diff --git a/src/gallium/drivers/galahad/glhd_context.c b/src/gallium/drivers/galahad/glhd_context.c index ab6f17b3ab..6473f2d499 100644 --- a/src/gallium/drivers/galahad/glhd_context.c +++ b/src/gallium/drivers/galahad/glhd_context.c @@ -112,6 +112,16 @@ galahad_draw_range_elements(struct pipe_context *_pipe, count); } +static void +galahad_draw_vbo(struct pipe_context *_pipe, + const struct pipe_draw_info *info) +{ + struct galahad_context *glhd_pipe = galahad_context(_pipe); + struct pipe_context *pipe = glhd_pipe->pipe; + + pipe->draw_vbo(pipe, info); +} + static struct pipe_query * galahad_create_query(struct pipe_context *_pipe, unsigned query_type) @@ -650,6 +660,41 @@ galahad_set_vertex_buffers(struct pipe_context *_pipe, num_buffers, buffers); } + +static void +galahad_set_index_buffer(struct pipe_context *_pipe, + const struct pipe_index_buffer *_ib) +{ + struct galahad_context *glhd_pipe = galahad_context(_pipe); + struct pipe_context *pipe = glhd_pipe->pipe; + struct pipe_index_buffer unwrapped_ib, *ib = NULL; + + if (_ib->buffer) { + switch (_ib->index_size) { + case 1: + case 2: + case 4: + break; + default: + glhd_warn("index buffer %p has unrecognized index size %d", + _ib->buffer, _ib->index_size); + break; + } + } + else if (_ib->offset || _ib->index_size) { + glhd_warn("non-indexed state with index offset %d and index size %d", + _ib->offset, _ib->index_size); + } + + if (_ib) { + unwrapped_ib = *_ib; + unwrapped_ib.buffer = galahad_resource_unwrap(_ib->buffer); + ib = &unwrapped_ib; + } + + pipe->set_index_buffer(pipe, ib); +} + static void galahad_resource_copy_region(struct pipe_context *_pipe, struct pipe_resource *_dst, @@ -937,6 +982,7 @@ galahad_context_create(struct pipe_screen *_screen, struct pipe_context *pipe) glhd_pipe->base.draw_arrays = galahad_draw_arrays; glhd_pipe->base.draw_elements = galahad_draw_elements; glhd_pipe->base.draw_range_elements = galahad_draw_range_elements; + glhd_pipe->base.draw_vbo = galahad_draw_vbo; glhd_pipe->base.create_query = galahad_create_query; glhd_pipe->base.destroy_query = galahad_destroy_query; glhd_pipe->base.begin_query = galahad_begin_query; @@ -976,6 +1022,7 @@ galahad_context_create(struct pipe_screen *_screen, struct pipe_context *pipe) glhd_pipe->base.set_fragment_sampler_views = galahad_set_fragment_sampler_views; glhd_pipe->base.set_vertex_sampler_views = galahad_set_vertex_sampler_views; glhd_pipe->base.set_vertex_buffers = galahad_set_vertex_buffers; + glhd_pipe->base.set_index_buffer = galahad_set_index_buffer; glhd_pipe->base.resource_copy_region = galahad_resource_copy_region; glhd_pipe->base.clear = galahad_clear; glhd_pipe->base.clear_render_target = galahad_clear_render_target; diff --git a/src/gallium/drivers/i915/i915_context.c b/src/gallium/drivers/i915/i915_context.c index 2af9bdac95..ca07b3e235 100644 --- a/src/gallium/drivers/i915/i915_context.c +++ b/src/gallium/drivers/i915/i915_context.c @@ -36,6 +36,7 @@ #include "pipe/p_defines.h" #include "util/u_inlines.h" #include "util/u_memory.h" +#include "util/u_draw_quad.h" #include "pipe/p_screen.h" @@ -45,16 +46,11 @@ static void -i915_draw_range_elements(struct pipe_context *pipe, - struct pipe_resource *indexBuffer, - unsigned indexSize, - int indexBias, - unsigned min_index, - unsigned max_index, - unsigned prim, unsigned start, unsigned count) +i915_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) { struct i915_context *i915 = i915_context(pipe); struct draw_context *draw = i915->draw; + void *mapped_indices = NULL; unsigned i; if (i915->dirty) @@ -71,16 +67,18 @@ i915_draw_range_elements(struct pipe_context *pipe, /* * Map index buffer, if present */ - if (indexBuffer) { - void *mapped_indexes = i915_buffer(indexBuffer)->data; - draw_set_mapped_element_buffer_range(draw, indexSize, indexBias, - min_index, - max_index, - mapped_indexes); - } else { - draw_set_mapped_element_buffer(draw, 0, 0, NULL); + if (info->indexed && i915->index_buffer.buffer) { + mapped_indices = i915_buffer(i915->index_buffer.buffer)->data; + mapped_indices += i915->index_buffer.offset; } + draw_set_mapped_element_buffer_range(draw, (mapped_indices) ? + i915->index_buffer.index_size : 0, + info->index_bias, + info->min_index, + info->max_index, + mapped_indices); + draw_set_mapped_constant_buffer(draw, PIPE_SHADER_VERTEX, 0, i915->current.constants[PIPE_SHADER_VERTEX], @@ -90,7 +88,7 @@ i915_draw_range_elements(struct pipe_context *pipe, /* * Do the drawing */ - draw_arrays(i915->draw, prim, start, count); + draw_arrays(i915->draw, info->mode, info->start, info->count); /* * unmap vertex/index buffers @@ -99,11 +97,48 @@ i915_draw_range_elements(struct pipe_context *pipe, draw_set_mapped_vertex_buffer(draw, i, NULL); } - if (indexBuffer) { + if (mapped_indices) { draw_set_mapped_element_buffer(draw, 0, 0, NULL); } } +static void +i915_draw_range_elements(struct pipe_context *pipe, + struct pipe_resource *indexBuffer, + unsigned indexSize, + int indexBias, + unsigned min_index, + unsigned max_index, + unsigned prim, unsigned start, unsigned count) +{ + struct i915_context *i915 = i915_context(pipe); + struct pipe_draw_info info; + struct pipe_index_buffer saved_ib, ib; + + util_draw_init_info(&info); + info.mode = prim; + info.start = start; + info.count = count; + info.index_bias = indexBias; + info.min_index = min_index; + info.max_index = max_index; + + if (indexBuffer) { + info.indexed = TRUE; + saved_ib = i915->index_buffer; + + ib.buffer = indexBuffer; + ib.offset = 0; + ib.index_size = indexSize; + pipe->set_index_buffer(pipe, &ib); + } + + i915_draw_vbo(pipe, &info); + + if (indexBuffer) + pipe->set_index_buffer(pipe, &saved_ib); +} + static void i915_draw_elements(struct pipe_context *pipe, struct pipe_resource *indexBuffer, @@ -171,6 +206,7 @@ i915_create_context(struct pipe_screen *screen, void *priv) i915->base.draw_arrays = i915_draw_arrays; i915->base.draw_elements = i915_draw_elements; i915->base.draw_range_elements = i915_draw_range_elements; + i915->base.draw_vbo = i915_draw_vbo; /* * Create drawing context and plug our rendering stage into it. diff --git a/src/gallium/drivers/i915/i915_context.h b/src/gallium/drivers/i915/i915_context.h index b210cb130d..3ae61d0ea7 100644 --- a/src/gallium/drivers/i915/i915_context.h +++ b/src/gallium/drivers/i915/i915_context.h @@ -221,6 +221,7 @@ struct i915_context struct pipe_sampler_view *fragment_sampler_views[PIPE_MAX_SAMPLERS]; struct pipe_viewport_state viewport; struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS]; + struct pipe_index_buffer index_buffer; unsigned dirty; diff --git a/src/gallium/drivers/i915/i915_state.c b/src/gallium/drivers/i915/i915_state.c index e767aa9f8f..385c3b2d2d 100644 --- a/src/gallium/drivers/i915/i915_state.c +++ b/src/gallium/drivers/i915/i915_state.c @@ -812,6 +812,19 @@ i915_delete_vertex_elements_state(struct pipe_context *pipe, void *velems) FREE( velems ); } +static void i915_set_index_buffer(struct pipe_context *pipe, + const struct pipe_index_buffer *ib) +{ + struct i915_context *i915 = i915_context(pipe); + + if (ib) + memcpy(&i915->index_buffer, ib, sizeof(i915->index_buffer)); + else + memset(&i915->index_buffer, 0, sizeof(i915->index_buffer)); + + /* TODO make this more like a state */ +} + static void i915_set_sample_mask(struct pipe_context *pipe, unsigned sample_mask) @@ -860,4 +873,5 @@ i915_init_state_functions( struct i915_context *i915 ) i915->base.sampler_view_destroy = i915_sampler_view_destroy; i915->base.set_viewport_state = i915_set_viewport_state; i915->base.set_vertex_buffers = i915_set_vertex_buffers; + i915->base.set_index_buffer = i915_set_index_buffer; } diff --git a/src/gallium/drivers/i965/brw_context.h b/src/gallium/drivers/i965/brw_context.h index 94c9c443f0..56d351f97d 100644 --- a/src/gallium/drivers/i965/brw_context.h +++ b/src/gallium/drivers/i965/brw_context.h @@ -576,6 +576,7 @@ struct brw_context */ struct pipe_resource *index_buffer; unsigned index_size; + unsigned index_offset; /* Updates are signalled by PIPE_NEW_INDEX_RANGE: */ diff --git a/src/gallium/drivers/i965/brw_draw.c b/src/gallium/drivers/i965/brw_draw.c index 4625c2048f..fa7d047e0b 100644 --- a/src/gallium/drivers/i965/brw_draw.c +++ b/src/gallium/drivers/i965/brw_draw.c @@ -29,6 +29,7 @@ #include "util/u_inlines.h" #include "util/u_prim.h" #include "util/u_upload_mgr.h" +#include "util/u_draw_quad.h" #include "brw_draw.h" #include "brw_defines.h" @@ -142,7 +143,7 @@ static int brw_emit_prim(struct brw_context *brw, */ static int try_draw_range_elements(struct brw_context *brw, - struct pipe_resource *index_buffer, + boolean indexed, unsigned hw_prim, unsigned start, unsigned count) { @@ -165,7 +166,7 @@ try_draw_range_elements(struct brw_context *brw, if (ret) return ret; - ret = brw_emit_prim(brw, start, count, index_buffer != NULL, hw_prim); + ret = brw_emit_prim(brw, start, count, indexed, hw_prim); if (ret) return ret; @@ -177,61 +178,86 @@ try_draw_range_elements(struct brw_context *brw, static void -brw_draw_range_elements(struct pipe_context *pipe, - struct pipe_resource *index_buffer, - unsigned index_size, int index_bias, - unsigned min_index, - unsigned max_index, - unsigned mode, unsigned start, unsigned count) +brw_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) { struct brw_context *brw = brw_context(pipe); int ret; uint32_t hw_prim; - hw_prim = brw_set_prim(brw, mode); + hw_prim = brw_set_prim(brw, info->mode); if (BRW_DEBUG & DEBUG_PRIMS) debug_printf("PRIM: %s start %d count %d index_buffer %p\n", - u_prim_name(mode), start, count, (void *)index_buffer); - - assert(index_bias == 0); + u_prim_name(info->mode), info->start, info->count, + (void *) brw->curr.index_buffer); - /* Potentially trigger upload of new index buffer. - * - * XXX: do we need to go through state validation to achieve this? - * Could just call upload code directly. - */ - if (brw->curr.index_buffer != index_buffer || - brw->curr.index_size != index_size) { - pipe_resource_reference( &brw->curr.index_buffer, index_buffer ); - brw->curr.index_size = index_size; - brw->state.dirty.mesa |= PIPE_NEW_INDEX_BUFFER; - } + assert(info->index_bias == 0); - /* XXX: do we really care? + /* Potentially trigger upload of new index buffer range. + * XXX: do we really care? */ - if (brw->curr.min_index != min_index || - brw->curr.max_index != max_index) + if (brw->curr.min_index != info->min_index || + brw->curr.max_index != info->max_index) { - brw->curr.min_index = min_index; - brw->curr.max_index = max_index; + brw->curr.min_index = info->min_index; + brw->curr.max_index = info->max_index; brw->state.dirty.mesa |= PIPE_NEW_INDEX_RANGE; } /* Make a first attempt at drawing: */ - ret = try_draw_range_elements(brw, index_buffer, hw_prim, start, count ); + ret = try_draw_range_elements(brw, info->indexed, + hw_prim, info->start, info->count); /* Otherwise, flush and retry: */ if (ret != 0) { brw_context_flush( brw ); - ret = try_draw_range_elements(brw, index_buffer, hw_prim, start, count ); + ret = try_draw_range_elements(brw, info->indexed, + hw_prim, info->start, info->count); assert(ret == 0); } } +static void +brw_draw_range_elements(struct pipe_context *pipe, + struct pipe_resource *index_buffer, + unsigned index_size, int index_bias, + unsigned min_index, + unsigned max_index, + unsigned mode, unsigned start, unsigned count) +{ + struct brw_context *brw = brw_context(pipe); + struct pipe_draw_info info; + struct pipe_index_buffer saved_ib, ib; + + util_draw_init_info(&info); + info.mode = mode; + info.start = start; + info.count = count; + info.index_bias = index_bias; + info.min_index = min_index; + info.max_index = max_index; + + if (index_buffer) { + info.indexed = TRUE; + saved_ib.buffer = brw->curr.index_buffer; + saved_ib.offset = brw->curr.index_offset; + saved_ib.index_size = brw->curr.index_size; + + ib.buffer = index_buffer; + ib.offset = 0; + ib.index_size = index_size; + pipe->set_index_buffer(pipe, &ib); + } + + brw_draw_vbo(pipe, &info); + + if (index_buffer) + pipe->set_index_buffer(pipe, &saved_ib); +} + static void brw_draw_elements(struct pipe_context *pipe, struct pipe_resource *index_buffer, @@ -262,6 +288,7 @@ boolean brw_draw_init( struct brw_context *brw ) brw->base.draw_arrays = brw_draw_arrays; brw->base.draw_elements = brw_draw_elements; brw->base.draw_range_elements = brw_draw_range_elements; + brw->base.draw_vbo = brw_draw_vbo; /* Create helpers for uploading data in user buffers: */ diff --git a/src/gallium/drivers/i965/brw_draw_upload.c b/src/gallium/drivers/i965/brw_draw_upload.c index 337eee8cd9..ebeb1e146a 100644 --- a/src/gallium/drivers/i965/brw_draw_upload.c +++ b/src/gallium/drivers/i965/brw_draw_upload.c @@ -231,7 +231,7 @@ static int brw_prepare_indices(struct brw_context *brw) struct pipe_resource *upload_buf = NULL; struct brw_winsys_buffer *bo = NULL; GLuint offset; - GLuint index_size; + GLuint index_size, index_offset; GLuint ib_size; int ret; @@ -246,13 +246,14 @@ static int brw_prepare_indices(struct brw_context *brw) ib_size = index_buffer->width0; index_size = brw->curr.index_size; + index_offset = brw->curr.index_offset; /* Turn userbuffer into a proper hardware buffer? */ if (brw_buffer_is_user_buffer(index_buffer)) { ret = u_upload_buffer( brw->vb.upload_index, - 0, + index_offset, ib_size, index_buffer, &offset, @@ -269,7 +270,7 @@ static int brw_prepare_indices(struct brw_context *brw) else { bo = brw_buffer(index_buffer)->bo; ib_size = bo->size; - offset = 0; + offset = index_offset; } /* Use CMD_3D_PRIM's start_vertex_offset to avoid re-uploading the diff --git a/src/gallium/drivers/i965/brw_pipe_vertex.c b/src/gallium/drivers/i965/brw_pipe_vertex.c index 4a120a51da..007239efc4 100644 --- a/src/gallium/drivers/i965/brw_pipe_vertex.c +++ b/src/gallium/drivers/i965/brw_pipe_vertex.c @@ -274,10 +274,41 @@ static void brw_set_vertex_buffers(struct pipe_context *pipe, } +static void brw_set_index_buffer(struct pipe_context *pipe, + const struct pipe_index_buffer *ib) +{ + struct brw_context *brw = brw_context(pipe); + + if (ib) { + if (brw->curr.index_buffer == ib->buffer && + brw->curr.index_offset == ib->offset && + brw->curr.index_size == ib->index_size) + return; + + pipe_resource_reference(&brw->curr.index_buffer, ib->buffer); + brw->curr.index_offset = ib->offset; + brw->curr.index_size = ib->index_size; + } + else { + if (!brw->curr.index_buffer && + !brw->curr.index_offset && + !brw->curr.index_size) + return; + + pipe_resource_reference(&brw->curr.index_buffer, NULL); + brw->curr.index_offset = 0; + brw->curr.index_size = 0; + } + + brw->state.dirty.mesa |= PIPE_NEW_INDEX_BUFFER; +} + + void brw_pipe_vertex_init( struct brw_context *brw ) { brw->base.set_vertex_buffers = brw_set_vertex_buffers; + brw->base.set_index_buffer = brw_set_index_buffer; brw->base.create_vertex_elements_state = brw_create_vertex_elements_state; brw->base.bind_vertex_elements_state = brw_bind_vertex_elements_state; brw->base.delete_vertex_elements_state = brw_delete_vertex_elements_state; diff --git a/src/gallium/drivers/identity/id_context.c b/src/gallium/drivers/identity/id_context.c index 67be895b38..e10d3a1413 100644 --- a/src/gallium/drivers/identity/id_context.c +++ b/src/gallium/drivers/identity/id_context.c @@ -110,6 +110,16 @@ identity_draw_range_elements(struct pipe_context *_pipe, count); } +static void +identity_draw_vbo(struct pipe_context *_pipe, + const struct pipe_draw_info *info) +{ + struct identity_context *id_pipe = identity_context(_pipe); + struct pipe_context *pipe = id_pipe->pipe; + + pipe->draw_vbo(pipe, info); +} + static struct pipe_query * identity_create_query(struct pipe_context *_pipe, unsigned query_type) @@ -611,6 +621,24 @@ identity_set_vertex_buffers(struct pipe_context *_pipe, num_buffers, buffers); } + +static void +identity_set_index_buffer(struct pipe_context *_pipe, + const struct pipe_index_buffer *_ib) +{ + struct identity_context *id_pipe = identity_context(_pipe); + struct pipe_context *pipe = id_pipe->pipe; + struct pipe_index_buffer unwrapped_ib, *ib = NULL; + + if (_ib) { + unwrapped_ib = *_ib; + unwrapped_ib.buffer = identity_resource_unwrap(_ib->buffer); + ib = &unwrapped_ib; + } + + pipe->set_index_buffer(pipe, ib); +} + static void identity_resource_copy_region(struct pipe_context *_pipe, struct pipe_resource *_dst, @@ -892,6 +920,7 @@ identity_context_create(struct pipe_screen *_screen, struct pipe_context *pipe) id_pipe->base.draw_arrays = identity_draw_arrays; id_pipe->base.draw_elements = identity_draw_elements; id_pipe->base.draw_range_elements = identity_draw_range_elements; + id_pipe->base.draw_vbo = identity_draw_vbo; id_pipe->base.create_query = identity_create_query; id_pipe->base.destroy_query = identity_destroy_query; id_pipe->base.begin_query = identity_begin_query; @@ -931,6 +960,7 @@ identity_context_create(struct pipe_screen *_screen, struct pipe_context *pipe) id_pipe->base.set_fragment_sampler_views = identity_set_fragment_sampler_views; id_pipe->base.set_vertex_sampler_views = identity_set_vertex_sampler_views; id_pipe->base.set_vertex_buffers = identity_set_vertex_buffers; + id_pipe->base.set_index_buffer = identity_set_index_buffer; id_pipe->base.resource_copy_region = identity_resource_copy_region; id_pipe->base.clear = identity_clear; id_pipe->base.clear_render_target = identity_clear_render_target; diff --git a/src/gallium/drivers/llvmpipe/lp_context.h b/src/gallium/drivers/llvmpipe/lp_context.h index b2643ab33c..50f9091c3c 100644 --- a/src/gallium/drivers/llvmpipe/lp_context.h +++ b/src/gallium/drivers/llvmpipe/lp_context.h @@ -77,6 +77,7 @@ struct llvmpipe_context { struct pipe_sampler_view *vertex_sampler_views[PIPE_MAX_VERTEX_SAMPLERS]; struct pipe_viewport_state viewport; struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS]; + struct pipe_index_buffer index_buffer; struct { struct llvmpipe_resource *buffer[PIPE_MAX_SO_BUFFERS]; int offset[PIPE_MAX_SO_BUFFERS]; diff --git a/src/gallium/drivers/llvmpipe/lp_draw_arrays.c b/src/gallium/drivers/llvmpipe/lp_draw_arrays.c index 625d0c8a8c..b6dbb9d288 100644 --- a/src/gallium/drivers/llvmpipe/lp_draw_arrays.c +++ b/src/gallium/drivers/llvmpipe/lp_draw_arrays.c @@ -34,6 +34,7 @@ #include "pipe/p_defines.h" #include "pipe/p_context.h" #include "util/u_prim.h" +#include "util/u_draw_quad.h" #include "lp_context.h" #include "lp_state.h" @@ -49,20 +50,11 @@ * the drawing to the 'draw' module. */ static void -llvmpipe_draw_range_elements_instanced(struct pipe_context *pipe, - struct pipe_resource *indexBuffer, - unsigned indexSize, - int indexBias, - unsigned minIndex, - unsigned maxIndex, - unsigned mode, - unsigned start, - unsigned count, - unsigned startInstance, - unsigned instanceCount) +llvmpipe_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) { struct llvmpipe_context *lp = llvmpipe_context(pipe); struct draw_context *draw = lp->draw; + void *mapped_indices = NULL; unsigned i; if (lp->dirty) @@ -77,27 +69,25 @@ llvmpipe_draw_range_elements_instanced(struct pipe_context *pipe, } /* Map index buffer, if present */ - if (indexBuffer) { - void *mapped_indexes = llvmpipe_resource_data(indexBuffer); - draw_set_mapped_element_buffer_range(draw, - indexSize, - indexBias, - minIndex, - maxIndex, - mapped_indexes); - } - else { - /* no index/element buffer */ - draw_set_mapped_element_buffer_range(draw, 0, 0, start, - start + count - 1, NULL); + if (info->indexed && lp->index_buffer.buffer) { + mapped_indices = llvmpipe_resource_data(lp->index_buffer.buffer); + mapped_indices += lp->index_buffer.offset; } + + draw_set_mapped_element_buffer_range(draw, (mapped_indices) ? + lp->index_buffer.index_size : 0, + info->index_bias, + info->min_index, + info->max_index, + mapped_indices); + llvmpipe_prepare_vertex_sampling(lp, lp->num_vertex_sampler_views, lp->vertex_sampler_views); /* draw! */ - draw_arrays_instanced(draw, mode, start, count, - startInstance, instanceCount); + draw_arrays_instanced(draw, info->mode, info->start, info->count, + info->start_instance, info->instance_count); /* * unmap vertex/index buffers @@ -105,7 +95,7 @@ llvmpipe_draw_range_elements_instanced(struct pipe_context *pipe, for (i = 0; i < lp->num_vertex_buffers; i++) { draw_set_mapped_vertex_buffer(draw, i, NULL); } - if (indexBuffer) { + if (mapped_indices) { draw_set_mapped_element_buffer(draw, 0, 0, NULL); } llvmpipe_cleanup_vertex_sampling(lp); @@ -119,6 +109,50 @@ llvmpipe_draw_range_elements_instanced(struct pipe_context *pipe, } +static void +llvmpipe_draw_range_elements_instanced(struct pipe_context *pipe, + struct pipe_resource *indexBuffer, + unsigned indexSize, + int indexBias, + unsigned minIndex, + unsigned maxIndex, + unsigned mode, + unsigned start, + unsigned count, + unsigned startInstance, + unsigned instanceCount) +{ + struct llvmpipe_context *lp = llvmpipe_context(pipe); + struct pipe_draw_info info; + struct pipe_index_buffer saved_ib, ib; + + util_draw_init_info(&info); + info.mode = mode; + info.start = start; + info.count = count; + info.start_instance = startInstance; + info.instance_count = instanceCount; + + info.index_bias = indexBias; + info.min_index = minIndex; + info.max_index = maxIndex; + + if (indexBuffer) { + info.indexed = TRUE; + saved_ib = lp->index_buffer; + + ib.buffer = indexBuffer; + ib.offset = 0; + ib.index_size = indexSize; + pipe->set_index_buffer(pipe, &ib); + } + + llvmpipe_draw_vbo(pipe, &info); + + if (indexBuffer) + pipe->set_index_buffer(pipe, &saved_ib); +} + static void llvmpipe_draw_arrays_instanced(struct pipe_context *pipe, unsigned mode, @@ -227,4 +261,6 @@ llvmpipe_init_draw_funcs(struct llvmpipe_context *llvmpipe) llvmpipe->pipe.draw_range_elements = llvmpipe_draw_range_elements; llvmpipe->pipe.draw_arrays_instanced = llvmpipe_draw_arrays_instanced; llvmpipe->pipe.draw_elements_instanced = llvmpipe_draw_elements_instanced; + + llvmpipe->pipe.draw_vbo = llvmpipe_draw_vbo; } diff --git a/src/gallium/drivers/llvmpipe/lp_state_vertex.c b/src/gallium/drivers/llvmpipe/lp_state_vertex.c index 113f13db01..d86e66b4fb 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_vertex.c +++ b/src/gallium/drivers/llvmpipe/lp_state_vertex.c @@ -89,6 +89,19 @@ llvmpipe_set_vertex_buffers(struct pipe_context *pipe, } +static void +llvmpipe_set_index_buffer(struct pipe_context *pipe, + const struct pipe_index_buffer *ib) +{ + struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe); + + if (ib) + memcpy(&llvmpipe->index_buffer, ib, sizeof(llvmpipe->index_buffer)); + else + memset(&llvmpipe->index_buffer, 0, sizeof(llvmpipe->index_buffer)); + + /* TODO make this more like a state */ +} void llvmpipe_init_vertex_funcs(struct llvmpipe_context *llvmpipe) @@ -98,4 +111,5 @@ llvmpipe_init_vertex_funcs(struct llvmpipe_context *llvmpipe) llvmpipe->pipe.delete_vertex_elements_state = llvmpipe_delete_vertex_elements_state; llvmpipe->pipe.set_vertex_buffers = llvmpipe_set_vertex_buffers; + llvmpipe->pipe.set_index_buffer = llvmpipe_set_index_buffer; } diff --git a/src/gallium/drivers/nv50/nv50_context.c b/src/gallium/drivers/nv50/nv50_context.c index 915a925402..3fc39c1137 100644 --- a/src/gallium/drivers/nv50/nv50_context.c +++ b/src/gallium/drivers/nv50/nv50_context.c @@ -86,6 +86,7 @@ nv50_create(struct pipe_screen *pscreen, void *priv) nv50->pipe.draw_arrays_instanced = nv50_draw_arrays_instanced; nv50->pipe.draw_elements = nv50_draw_elements; nv50->pipe.draw_elements_instanced = nv50_draw_elements_instanced; + nv50->pipe.draw_vbo = nv50_draw_vbo; nv50->pipe.clear = nv50_clear; nv50->pipe.flush = nv50_flush; diff --git a/src/gallium/drivers/nv50/nv50_context.h b/src/gallium/drivers/nv50/nv50_context.h index 12c4a93a9b..a7c2b5d487 100644 --- a/src/gallium/drivers/nv50/nv50_context.h +++ b/src/gallium/drivers/nv50/nv50_context.h @@ -148,6 +148,7 @@ struct nv50_context { struct pipe_resource *constbuf[PIPE_SHADER_TYPES]; struct pipe_vertex_buffer vtxbuf[PIPE_MAX_ATTRIBS]; unsigned vtxbuf_nr; + struct pipe_index_buffer idxbuf; struct nv50_vtxelt_stateobj *vtxelt; struct nv50_sampler_stateobj *sampler[3][PIPE_MAX_SAMPLERS]; unsigned sampler_nr[3]; @@ -197,6 +198,8 @@ extern void nv50_draw_elements_instanced(struct pipe_context *pipe, unsigned count, unsigned startInstance, unsigned instanceCount); +extern void nv50_draw_vbo(struct pipe_context *pipe, + const struct pipe_draw_info *info); extern void nv50_vtxelt_construct(struct nv50_vtxelt_stateobj *cso); extern struct nouveau_stateobj *nv50_vbo_validate(struct nv50_context *nv50); diff --git a/src/gallium/drivers/nv50/nv50_state.c b/src/gallium/drivers/nv50/nv50_state.c index 42c5a58318..ec0c0ff283 100644 --- a/src/gallium/drivers/nv50/nv50_state.c +++ b/src/gallium/drivers/nv50/nv50_state.c @@ -742,6 +742,20 @@ nv50_set_vertex_buffers(struct pipe_context *pipe, unsigned count, nv50->dirty |= NV50_NEW_ARRAYS; } +static void +nv50_set_index_buffer(struct pipe_context *pipe, + const struct pipe_index_buffer *ib) +{ + struct nv50_context *nv50 = nv50_context(pipe); + + if (ib) + memcpy(&nv50->idxbuf, ib, sizeof(nv50->idxbuf)); + else + memset(&nv50->idxbuf, 0, sizeof(nv50->idxbuf)); + + /* TODO make this more like a state */ +} + static void * nv50_vtxelts_state_create(struct pipe_context *pipe, unsigned num_elements, @@ -827,5 +841,6 @@ nv50_init_state_functions(struct nv50_context *nv50) nv50->pipe.bind_vertex_elements_state = nv50_vtxelts_state_bind; nv50->pipe.set_vertex_buffers = nv50_set_vertex_buffers; + nv50->pipe.set_index_buffer = nv50_set_index_buffer; } diff --git a/src/gallium/drivers/nv50/nv50_vbo.c b/src/gallium/drivers/nv50/nv50_vbo.c index 864cb09352..11ffc182c2 100644 --- a/src/gallium/drivers/nv50/nv50_vbo.c +++ b/src/gallium/drivers/nv50/nv50_vbo.c @@ -473,6 +473,37 @@ nv50_draw_elements(struct pipe_context *pipe, mode, start, count, 0, 1); } +void +nv50_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) +{ + struct nv50_context *nv50 = nv50_context(pipe); + + if (info->indexed && nv50->idxbuf.buffer) { + unsigned offset; + + assert(nv50->idxbuf.offset % nv50->idxbuf.index_size == 0); + offset = nv50->idxbuf.offset / nv50->idxbuf.index_size; + + nv50_draw_elements_instanced(pipe, + nv50->idxbuf.buffer, + nv50->idxbuf.index_size, + info->index_bias, + info->mode, + info->start + offset, + info->count, + info->start_instance, + info->instance_count); + } + else { + nv50_draw_arrays_instanced(pipe, + info->mode, + info->start, + info->count, + info->start_instance, + info->instance_count); + } +} + static INLINE boolean nv50_vbo_static_attrib(struct nv50_context *nv50, unsigned attrib, struct nouveau_stateobj **pso, diff --git a/src/gallium/drivers/nvfx/nvfx_context.c b/src/gallium/drivers/nvfx/nvfx_context.c index 6d2dc4d5bf..f30795f69a 100644 --- a/src/gallium/drivers/nvfx/nvfx_context.c +++ b/src/gallium/drivers/nvfx/nvfx_context.c @@ -57,6 +57,7 @@ nvfx_create(struct pipe_screen *pscreen, void *priv) nvfx->pipe.destroy = nvfx_destroy; nvfx->pipe.draw_arrays = nvfx_draw_arrays; nvfx->pipe.draw_elements = nvfx_draw_elements; + nvfx->pipe.draw_vbo = nvfx_draw_vbo; nvfx->pipe.clear = nvfx_clear; nvfx->pipe.flush = nvfx_flush; diff --git a/src/gallium/drivers/nvfx/nvfx_context.h b/src/gallium/drivers/nvfx/nvfx_context.h index e48f9f3aa8..d6cd272eed 100644 --- a/src/gallium/drivers/nvfx/nvfx_context.h +++ b/src/gallium/drivers/nvfx/nvfx_context.h @@ -121,7 +121,8 @@ struct nvfx_context { struct pipe_stencil_ref stencil_ref; struct pipe_viewport_state viewport; struct pipe_framebuffer_state framebuffer; - struct pipe_resource *idxbuf; + struct pipe_index_buffer idxbuf; + struct pipe_resource *idxbuf_buffer; unsigned idxbuf_format; struct nvfx_sampler_state *tex_sampler[PIPE_MAX_SAMPLERS]; struct pipe_sampler_view *fragment_sampler_views[PIPE_MAX_SAMPLERS]; @@ -242,6 +243,8 @@ extern void nvfx_draw_elements(struct pipe_context *pipe, unsigned indexSize, int indexBias, unsigned mode, unsigned start, unsigned count); +extern void nvfx_draw_vbo(struct pipe_context *pipe, + const struct pipe_draw_info *info); /* nvfx_vertprog.c */ extern boolean nvfx_vertprog_validate(struct nvfx_context *nvfx); diff --git a/src/gallium/drivers/nvfx/nvfx_state.c b/src/gallium/drivers/nvfx/nvfx_state.c index 30322d46d9..cd58e439d7 100644 --- a/src/gallium/drivers/nvfx/nvfx_state.c +++ b/src/gallium/drivers/nvfx/nvfx_state.c @@ -555,6 +555,20 @@ nvfx_set_vertex_buffers(struct pipe_context *pipe, unsigned count, nvfx->draw_dirty |= NVFX_NEW_ARRAYS; } +static void +nvfx_set_index_buffer(struct pipe_context *pipe, + const struct pipe_index_buffer *ib) +{ + struct nvfx_context *nvfx = nvfx_context(pipe); + + if (ib) + memcpy(&nvfx->idxbuf, ib, sizeof(nvfx->idxbuf)); + else + memset(&nvfx->idxbuf, 0, sizeof(nvfx->idxbuf)); + + /* TODO make this more like a state */ +} + static void * nvfx_vtxelts_state_create(struct pipe_context *pipe, unsigned num_elements, @@ -635,4 +649,5 @@ nvfx_init_state_functions(struct nvfx_context *nvfx) nvfx->pipe.bind_vertex_elements_state = nvfx_vtxelts_state_bind; nvfx->pipe.set_vertex_buffers = nvfx_set_vertex_buffers; + nvfx->pipe.set_index_buffer = nvfx_set_index_buffer; } diff --git a/src/gallium/drivers/nvfx/nvfx_vbo.c b/src/gallium/drivers/nvfx/nvfx_vbo.c index 520bae5aed..23a59b589b 100644 --- a/src/gallium/drivers/nvfx/nvfx_vbo.c +++ b/src/gallium/drivers/nvfx/nvfx_vbo.c @@ -85,7 +85,7 @@ nvfx_vbo_set_idxbuf(struct nvfx_context *nvfx, struct pipe_resource *ib, unsigned type; if (!ib) { - nvfx->idxbuf = NULL; + nvfx->idxbuf_buffer = NULL; nvfx->idxbuf_format = 0xdeadbeef; return FALSE; } @@ -104,10 +104,10 @@ nvfx_vbo_set_idxbuf(struct nvfx_context *nvfx, struct pipe_resource *ib, return FALSE; } - if (ib != nvfx->idxbuf || + if (ib != nvfx->idxbuf_buffer || type != nvfx->idxbuf_format) { nvfx->dirty |= NVFX_NEW_ARRAYS; - nvfx->idxbuf = ib; + nvfx->idxbuf_buffer = ib; nvfx->idxbuf_format = type; } @@ -491,11 +491,38 @@ nvfx_draw_elements(struct pipe_context *pipe, pipe->flush(pipe, 0, NULL); } +void +nvfx_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) +{ + struct nvfx_context *nvfx = nvfx_context(pipe); + + if (info->indexed && nvfx->idxbuf.buffer) { + unsigned offset; + + assert(nvfx->idxbuf.offset % nvfx->idxbuf.index_size == 0); + offset = nvfx->idxbuf.offset / nvfx->idxbuf.index_size; + + nvfx_draw_elements(pipe, + nvfx->idxbuf.buffer, + nvfx->idxbuf.index_size, + info->index_bias, + info->mode, + info->start + offset, + info->count); + } + else { + nvfx_draw_arrays(pipe, + info->mode, + info->start, + info->count); + } +} + boolean nvfx_vbo_validate(struct nvfx_context *nvfx) { struct nouveau_channel* chan = nvfx->screen->base.channel; - struct pipe_resource *ib = nvfx->idxbuf; + struct pipe_resource *ib = nvfx->idxbuf_buffer; unsigned ib_format = nvfx->idxbuf_format; int i; int elements = MAX2(nvfx->vtxelt->num_elements, nvfx->hw_vtxelt_nr); @@ -610,10 +637,10 @@ nvfx_vbo_relocate(struct nvfx_context *nvfx) } } - if(nvfx->idxbuf) + if(nvfx->idxbuf_buffer) { unsigned ib_flags = nvfx->screen->index_buffer_reloc_flags | NOUVEAU_BO_RD | NOUVEAU_BO_DUMMY; - struct nouveau_bo* bo = nvfx_resource(nvfx->idxbuf)->bo; + struct nouveau_bo* bo = nvfx_resource(nvfx->idxbuf_buffer)->bo; assert(nvfx->screen->index_buffer_reloc_flags); diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h index b4256c6278..7c77a46016 100644 --- a/src/gallium/drivers/r300/r300_context.h +++ b/src/gallium/drivers/r300/r300_context.h @@ -524,6 +524,8 @@ struct r300_context { struct r300_vertex_element_state *velems; bool any_user_vbs; + struct pipe_index_buffer index_buffer; + /* Vertex info for Draw. */ struct vertex_info vertex_info; diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c index bae02135da..da96098cc4 100644 --- a/src/gallium/drivers/r300/r300_render.c +++ b/src/gallium/drivers/r300/r300_render.c @@ -33,6 +33,7 @@ #include "util/u_memory.h" #include "util/u_upload_mgr.h" #include "util/u_prim.h" +#include "util/u_draw_quad.h" #include "r300_cs.h" #include "r300_context.h" @@ -638,26 +639,56 @@ static void r300_draw_arrays(struct pipe_context* pipe, unsigned mode, } } +static void r300_draw_vbo(struct pipe_context* pipe, + const struct pipe_draw_info *info) +{ + struct r300_context* r300 = r300_context(pipe); + + if (info->indexed && r300->index_buffer.buffer) { + unsigned offset; + + assert(r300->index_buffer.offset % r300->index_buffer.index_size == 0); + offset = r300->index_buffer.offset / r300->index_buffer.index_size; + + r300_draw_range_elements(pipe, + r300->index_buffer.buffer, + r300->index_buffer.index_size, + info->index_bias, + info->min_index, + info->max_index, + info->mode, + info->start + offset, + info->count); + } + else { + r300_draw_arrays(pipe, + info->mode, + info->start, + info->count); + } +} + /**************************************************************************** * The rest of this file is for SW TCL rendering only. Please be polite and * * keep these functions separated so that they are easier to locate. ~C. * ***************************************************************************/ -/* SW TCL arrays, using Draw. */ -static void r300_swtcl_draw_arrays(struct pipe_context* pipe, - unsigned mode, - unsigned start, - unsigned count) +/* SW TCL elements, using Draw. */ +static void r300_swtcl_draw_vbo(struct pipe_context* pipe, + const struct pipe_draw_info *info) { struct r300_context* r300 = r300_context(pipe); struct pipe_transfer *vb_transfer[PIPE_MAX_ATTRIBS]; + struct pipe_transfer *ib_transfer; + unsigned count = info->count; int i; + void* indices = NULL; if (r300->skip_rendering) { return; } - if (!u_trim_pipe_prim(mode, &count)) { + if (!u_trim_pipe_prim(info->mode, &count)) { return; } @@ -667,13 +698,25 @@ static void r300_swtcl_draw_arrays(struct pipe_context* pipe, void* buf = pipe_buffer_map(pipe, r300->vertex_buffer[i].buffer, PIPE_TRANSFER_READ, - &vb_transfer[i]); + &vb_transfer[i]); draw_set_mapped_vertex_buffer(r300->draw, i, buf); } - draw_set_mapped_element_buffer(r300->draw, 0, 0, NULL); + if (info->indexed && r300->index_buffer.buffer) { + indices = pipe_buffer_map(pipe, r300->index_buffer.buffer, + PIPE_TRANSFER_READ, &ib_transfer); + if (indices) + indices += r300->index_buffer.offset; + } + + draw_set_mapped_element_buffer_range(r300->draw, (indices) ? + r300->index_buffer.index_size : 0, + info->index_bias, + info->min_index, + info->max_index, + indices); - draw_arrays(r300->draw, mode, start, count); + draw_arrays(r300->draw, info->mode, info->start, count); /* XXX Not sure whether this is the best fix. * It prevents CS from being rejected and weird assertion failures. */ @@ -681,9 +724,15 @@ static void r300_swtcl_draw_arrays(struct pipe_context* pipe, for (i = 0; i < r300->vertex_buffer_count; i++) { pipe_buffer_unmap(pipe, r300->vertex_buffer[i].buffer, - vb_transfer[i]); + vb_transfer[i]); draw_set_mapped_vertex_buffer(r300->draw, i, NULL); } + + if (ib_transfer) { + pipe_buffer_unmap(pipe, r300->index_buffer.buffer, ib_transfer); + draw_set_mapped_element_buffer_range(r300->draw, 0, 0, info->start, + info->start + count - 1, NULL); + } } /* SW TCL elements, using Draw. */ @@ -698,51 +747,40 @@ static void r300_swtcl_draw_range_elements(struct pipe_context* pipe, unsigned count) { struct r300_context* r300 = r300_context(pipe); - struct pipe_transfer *vb_transfer[PIPE_MAX_ATTRIBS]; - struct pipe_transfer *ib_transfer; - int i; - void* indices; - - if (r300->skip_rendering) { - return; - } - - if (!u_trim_pipe_prim(mode, &count)) { - return; + struct pipe_draw_info info; + struct pipe_index_buffer saved_ib, ib; + + util_draw_init_info(&info); + info.mode = mode; + info.start = start; + info.count = count; + info.index_bias = indexBias; + info.min_index = minIndex; + info.max_index = maxIndex; + + if (indexBuffer) { + info.indexed = TRUE; + + saved_ib = r300->index_buffer; + ib.buffer = indexBuffer; + ib.offset = 0; + ib.index_size = indexSize; + pipe->set_index_buffer(pipe, &ib); } - r300_update_derived_state(r300); - - for (i = 0; i < r300->vertex_buffer_count; i++) { - void* buf = pipe_buffer_map(pipe, - r300->vertex_buffer[i].buffer, - PIPE_TRANSFER_READ, - &vb_transfer[i]); - draw_set_mapped_vertex_buffer(r300->draw, i, buf); - } - - indices = pipe_buffer_map(pipe, indexBuffer, - PIPE_TRANSFER_READ, &ib_transfer); - draw_set_mapped_element_buffer_range(r300->draw, indexSize, indexBias, - minIndex, maxIndex, indices); - - draw_arrays(r300->draw, mode, start, count); - - /* XXX Not sure whether this is the best fix. - * It prevents CS from being rejected and weird assertion failures. */ - draw_flush(r300->draw); + r300_swtcl_draw_vbo(pipe, &info); - for (i = 0; i < r300->vertex_buffer_count; i++) { - pipe_buffer_unmap(pipe, r300->vertex_buffer[i].buffer, - vb_transfer[i]); - draw_set_mapped_vertex_buffer(r300->draw, i, NULL); - } + if (indexBuffer) + pipe->set_index_buffer(pipe, &saved_ib); +} - pipe_buffer_unmap(pipe, indexBuffer, - ib_transfer); - draw_set_mapped_element_buffer_range(r300->draw, 0, 0, - start, start + count - 1, - NULL); +static void r300_swtcl_draw_arrays(struct pipe_context* pipe, + unsigned mode, + unsigned start, + unsigned count) +{ + r300_swtcl_draw_range_elements(pipe, NULL, 0, 0, + start, start + count -1, mode, start, count); } /* Object for rendering using Draw. */ @@ -1148,9 +1186,11 @@ void r300_init_render_functions(struct r300_context *r300) if (r300->screen->caps.has_tcl) { r300->context.draw_arrays = r300_draw_arrays; r300->context.draw_range_elements = r300_draw_range_elements; + r300->context.draw_vbo = r300_draw_vbo; } else { r300->context.draw_arrays = r300_swtcl_draw_arrays; r300->context.draw_range_elements = r300_swtcl_draw_range_elements; + r300->context.draw_vbo = r300_swtcl_draw_vbo; } r300->context.resource_resolve = r300_resource_resolve; diff --git a/src/gallium/drivers/r300/r300_render_stencilref.c b/src/gallium/drivers/r300/r300_render_stencilref.c index 9a6b4e12ff..6d801cf159 100644 --- a/src/gallium/drivers/r300/r300_render_stencilref.c +++ b/src/gallium/drivers/r300/r300_render_stencilref.c @@ -42,6 +42,9 @@ struct r300_stencilref_context { unsigned indexSize, int indexBias, unsigned minIndex, unsigned maxIndex, unsigned mode, unsigned start, unsigned count); + void (*draw_vbo)(struct pipe_context *pipe, + const struct pipe_draw_info *info); + uint32_t rs_cull_mode; uint32_t zb_stencilrefmask; ubyte ref_value_front; @@ -144,6 +147,23 @@ static void r300_stencilref_draw_range_elements( } } +static void r300_stencilref_draw_vbo(struct pipe_context *pipe, + const struct pipe_draw_info *info) +{ + struct r300_context *r300 = r300_context(pipe); + struct r300_stencilref_context *sr = r300->stencilref_fallback; + + if (!r300_stencilref_needed(r300)) { + sr->draw_vbo(pipe, info); + } else { + r300_stencilref_begin(r300); + sr->draw_vbo(pipe, info); + r300_stencilref_switch_side(r300); + sr->draw_vbo(pipe, info); + r300_stencilref_end(r300); + } +} + void r300_plug_in_stencil_ref_fallback(struct r300_context *r300) { r300->stencilref_fallback = CALLOC_STRUCT(r300_stencilref_context); @@ -151,8 +171,10 @@ void r300_plug_in_stencil_ref_fallback(struct r300_context *r300) /* Save original draw functions. */ r300->stencilref_fallback->draw_arrays = r300->context.draw_arrays; r300->stencilref_fallback->draw_range_elements = r300->context.draw_range_elements; + r300->stencilref_fallback->draw_vbo = r300->context.draw_vbo; /* Override the draw functions. */ r300->context.draw_arrays = r300_stencilref_draw_arrays; r300->context.draw_range_elements = r300_stencilref_draw_range_elements; + r300->context.draw_vbo = r300_stencilref_draw_vbo; } diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c index 3e221f2e02..bccd7d7859 100644 --- a/src/gallium/drivers/r300/r300_state.c +++ b/src/gallium/drivers/r300/r300_state.c @@ -1505,6 +1505,23 @@ static void r300_set_vertex_buffers(struct pipe_context* pipe, r300->vertex_buffer_count = count; } +static void r300_set_index_buffer(struct pipe_context* pipe, + const struct pipe_index_buffer *ib) +{ + struct r300_context* r300 = r300_context(pipe); + + if (ib) { + pipe_resource_reference(&r300->index_buffer.buffer, ib->buffer); + memcpy(&r300->index_buffer, ib, sizeof(r300->index_buffer)); + } + else { + pipe_resource_reference(&r300->index_buffer.buffer, NULL); + memset(&r300->index_buffer, 0, sizeof(r300->index_buffer)); + } + + /* TODO make this more like a state */ +} + /* Initialize the PSC tables. */ static void r300_vertex_psc(struct r300_vertex_element_state *velems) { @@ -1852,6 +1869,7 @@ void r300_init_state_functions(struct r300_context* r300) r300->context.set_viewport_state = r300_set_viewport_state; r300->context.set_vertex_buffers = r300_set_vertex_buffers; + r300->context.set_index_buffer = r300_set_index_buffer; r300->context.create_vertex_elements_state = r300_create_vertex_elements_state; r300->context.bind_vertex_elements_state = r300_bind_vertex_elements_state; diff --git a/src/gallium/drivers/r600/r600_context.c b/src/gallium/drivers/r600/r600_context.c index 4c7b67ea52..2c2bd4672b 100644 --- a/src/gallium/drivers/r600/r600_context.c +++ b/src/gallium/drivers/r600/r600_context.c @@ -316,6 +316,7 @@ struct pipe_context *r600_create_context(struct pipe_screen *screen, void *priv) rctx->context.draw_arrays = r600_draw_arrays; rctx->context.draw_elements = r600_draw_elements; rctx->context.draw_range_elements = r600_draw_range_elements; + rctx->context.draw_vbo = r600_draw_vbo; rctx->context.flush = r600_flush; /* Easy accessing of screen/winsys. */ diff --git a/src/gallium/drivers/r600/r600_context.h b/src/gallium/drivers/r600/r600_context.h index 1f03b202ee..9427c19d05 100644 --- a/src/gallium/drivers/r600/r600_context.h +++ b/src/gallium/drivers/r600/r600_context.h @@ -157,6 +157,7 @@ struct r600_context { struct r600_context_state *vs_sampler_view[PIPE_MAX_ATTRIBS]; struct r600_vertex_element *vertex_elements; struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS]; + struct pipe_index_buffer index_buffer; }; #if 0 @@ -201,6 +202,8 @@ void r600_draw_range_elements(struct pipe_context *ctx, unsigned index_size, int index_bias, unsigned min_index, unsigned max_index, unsigned mode, unsigned start, unsigned count); +void r600_draw_vbo(struct pipe_context *ctx, + const struct pipe_draw_info *info); void r600_init_blit_functions(struct r600_context *rctx); void r600_init_state_functions(struct r600_context *rctx); diff --git a/src/gallium/drivers/r600/r600_draw.c b/src/gallium/drivers/r600/r600_draw.c index b248beaf8c..eeaa677edb 100644 --- a/src/gallium/drivers/r600/r600_draw.c +++ b/src/gallium/drivers/r600/r600_draw.c @@ -225,3 +225,30 @@ void r600_draw_arrays(struct pipe_context *ctx, unsigned mode, draw.index_buffer = NULL; r600_draw_common(&draw); } + +void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info) +{ + struct r600_context *rctx = r600_context(ctx); + struct r600_draw draw; + + assert(info->index_bias == 0); + + draw.ctx = ctx; + draw.mode = info->mode; + draw.start = info->start; + draw.count = info->count; + if (info->indexed && rctx->index_buffer.buffer) { + draw.index_size = rctx->index_buffer.index_size; + draw.index_buffer = rctx->index_buffer.buffer; + + assert(rctx->index_buffer.offset % + rctx->index_buffer.index_size == 0); + draw.start += rctx->index_buffer.offset / + rctx->index_buffer.index_size; + } + else { + draw.index_size = 0; + draw.index_buffer = NULL; + } + r600_draw_common(&draw); +} diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index ff5df855c6..57879e8d8b 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -404,6 +404,23 @@ static void r600_set_vertex_buffers(struct pipe_context *ctx, rctx->nvertex_buffer = count; } +static void r600_set_index_buffer(struct pipe_context *ctx, + const struct pipe_index_buffer *ib) +{ + struct r600_context *rctx = r600_context(ctx); + + if (ib) { + pipe_resource_reference(&rctx->index_buffer.buffer, ib->buffer); + memcpy(&rctx->index_buffer, ib, sizeof(rctx->index_buffer)); + } + else { + pipe_resource_reference(&rctx->index_buffer.buffer, NULL); + memset(&rctx->index_buffer, 0, sizeof(rctx->index_buffer)); + } + + /* TODO make this more like a state */ +} + static void r600_set_viewport_state(struct pipe_context *ctx, const struct pipe_viewport_state *state) { @@ -449,6 +466,7 @@ void r600_init_state_functions(struct r600_context *rctx) rctx->context.set_scissor_state = r600_set_scissor_state; rctx->context.set_stencil_ref = r600_set_stencil_ref; rctx->context.set_vertex_buffers = r600_set_vertex_buffers; + rctx->context.set_index_buffer = r600_set_index_buffer; rctx->context.set_vertex_sampler_views = r600_set_vs_sampler_view; rctx->context.set_viewport_state = r600_set_viewport_state; rctx->context.sampler_view_destroy = r600_sampler_view_destroy; diff --git a/src/gallium/drivers/rbug/rbug_context.c b/src/gallium/drivers/rbug/rbug_context.c index e0dd5cf8c2..c748073b2a 100644 --- a/src/gallium/drivers/rbug/rbug_context.c +++ b/src/gallium/drivers/rbug/rbug_context.c @@ -185,6 +185,21 @@ rbug_draw_range_elements(struct pipe_context *_pipe, pipe_mutex_unlock(rb_pipe->draw_mutex); } +static void +rbug_draw_vbo(struct pipe_context *_pipe, const struct pipe_draw_info *info) +{ + struct rbug_context *rb_pipe = rbug_context(_pipe); + struct pipe_context *pipe = rb_pipe->pipe; + + pipe_mutex_lock(rb_pipe->draw_mutex); + rbug_draw_block_locked(rb_pipe, RBUG_BLOCK_BEFORE); + + pipe->draw_vbo(pipe, info); + + rbug_draw_block_locked(rb_pipe, RBUG_BLOCK_AFTER); + pipe_mutex_unlock(rb_pipe->draw_mutex); +} + static struct pipe_query * rbug_create_query(struct pipe_context *_pipe, unsigned query_type) @@ -744,6 +759,23 @@ rbug_set_vertex_buffers(struct pipe_context *_pipe, buffers); } +static void +rbug_set_index_buffer(struct pipe_context *_pipe, + const struct pipe_index_buffer *_ib) +{ + struct rbug_context *rb_pipe = rbug_context(_pipe); + struct pipe_context *pipe = rb_pipe->pipe; + struct pipe_index_buffer unwrapped_ib, *ib = NULL; + + if (_ib) { + unwrapped_ib = *_ib; + unwrapped_ib.buffer = rbug_resource_unwrap(_ib->buffer); + ib = &unwrapped_ib; + } + + pipe->set_index_buffer(pipe, ib); +} + static void rbug_set_sample_mask(struct pipe_context *_pipe, unsigned sample_mask) @@ -1043,6 +1075,7 @@ rbug_context_create(struct pipe_screen *_screen, struct pipe_context *pipe) rb_pipe->base.draw_arrays = rbug_draw_arrays; rb_pipe->base.draw_elements = rbug_draw_elements; rb_pipe->base.draw_range_elements = rbug_draw_range_elements; + rb_pipe->base.draw_vbo = rbug_draw_vbo; rb_pipe->base.create_query = rbug_create_query; rb_pipe->base.destroy_query = rbug_destroy_query; rb_pipe->base.begin_query = rbug_begin_query; @@ -1084,6 +1117,7 @@ rbug_context_create(struct pipe_screen *_screen, struct pipe_context *pipe) rb_pipe->base.set_fragment_sampler_views = rbug_set_fragment_sampler_views; rb_pipe->base.set_vertex_sampler_views = rbug_set_vertex_sampler_views; rb_pipe->base.set_vertex_buffers = rbug_set_vertex_buffers; + rb_pipe->base.set_index_buffer = rbug_set_index_buffer; rb_pipe->base.set_sample_mask = rbug_set_sample_mask; rb_pipe->base.resource_copy_region = rbug_resource_copy_region; rb_pipe->base.clear = rbug_clear; diff --git a/src/gallium/drivers/softpipe/sp_context.c b/src/gallium/drivers/softpipe/sp_context.c index 12ef98aac7..fa1fae6f00 100644 --- a/src/gallium/drivers/softpipe/sp_context.c +++ b/src/gallium/drivers/softpipe/sp_context.c @@ -282,12 +282,14 @@ softpipe_create_context( struct pipe_screen *screen, softpipe->pipe.set_viewport_state = softpipe_set_viewport_state; softpipe->pipe.set_stream_output_buffers = softpipe_set_stream_output_buffers; softpipe->pipe.set_vertex_buffers = softpipe_set_vertex_buffers; + softpipe->pipe.set_index_buffer = softpipe_set_index_buffer; softpipe->pipe.draw_arrays = softpipe_draw_arrays; softpipe->pipe.draw_elements = softpipe_draw_elements; softpipe->pipe.draw_range_elements = softpipe_draw_range_elements; softpipe->pipe.draw_arrays_instanced = softpipe_draw_arrays_instanced; softpipe->pipe.draw_elements_instanced = softpipe_draw_elements_instanced; + softpipe->pipe.draw_vbo = softpipe_draw_vbo; softpipe->pipe.draw_stream_output = softpipe_draw_stream_output; softpipe->pipe.clear = softpipe_clear; diff --git a/src/gallium/drivers/softpipe/sp_context.h b/src/gallium/drivers/softpipe/sp_context.h index 53115a827d..c5f53cfa61 100644 --- a/src/gallium/drivers/softpipe/sp_context.h +++ b/src/gallium/drivers/softpipe/sp_context.h @@ -82,6 +82,7 @@ struct softpipe_context { struct pipe_sampler_view *geometry_sampler_views[PIPE_MAX_GEOMETRY_SAMPLERS]; struct pipe_viewport_state viewport; struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS]; + struct pipe_index_buffer index_buffer; struct { struct softpipe_resource *buffer[PIPE_MAX_SO_BUFFERS]; int offset[PIPE_MAX_SO_BUFFERS]; diff --git a/src/gallium/drivers/softpipe/sp_draw_arrays.c b/src/gallium/drivers/softpipe/sp_draw_arrays.c index 9e727c9381..2855f55a0e 100644 --- a/src/gallium/drivers/softpipe/sp_draw_arrays.c +++ b/src/gallium/drivers/softpipe/sp_draw_arrays.c @@ -35,6 +35,7 @@ #include "pipe/p_context.h" #include "util/u_inlines.h" #include "util/u_prim.h" +#include "util/u_draw_quad.h" #include "sp_context.h" #include "sp_query.h" @@ -111,27 +112,19 @@ softpipe_draw_stream_output(struct pipe_context *pipe, unsigned mode) * When the min/max element indexes aren't known, minIndex should be 0 * and maxIndex should be ~0. */ -static void -softpipe_draw_range_elements_instanced(struct pipe_context *pipe, - struct pipe_resource *indexBuffer, - unsigned indexSize, - int indexBias, - unsigned minIndex, - unsigned maxIndex, - unsigned mode, - unsigned start, - unsigned count, - unsigned startInstance, - unsigned instanceCount) +void +softpipe_draw_vbo(struct pipe_context *pipe, + const struct pipe_draw_info *info) { struct softpipe_context *sp = softpipe_context(pipe); struct draw_context *draw = sp->draw; + void *mapped_indices = NULL; unsigned i; if (!softpipe_check_render_cond(sp)) return; - sp->reduced_api_prim = u_reduced_prim(mode); + sp->reduced_api_prim = u_reduced_prim(info->mode); if (sp->dirty) { softpipe_update_derived(sp); @@ -146,31 +139,27 @@ softpipe_draw_range_elements_instanced(struct pipe_context *pipe, } /* Map index buffer, if present */ - if (indexBuffer) { - void *mapped_indexes = softpipe_resource(indexBuffer)->data; - draw_set_mapped_element_buffer_range(draw, - indexSize, - indexBias, - minIndex, - maxIndex, - mapped_indexes); - } else { - /* no index/element buffer */ - draw_set_mapped_element_buffer_range(draw, - 0, 0, - start, - start + count - 1, - NULL); + if (info->indexed && sp->index_buffer.buffer) { + mapped_indices = softpipe_resource(sp->index_buffer.buffer)->data; + mapped_indices += sp->index_buffer.offset; } + draw_set_mapped_element_buffer_range(draw, (mapped_indices) ? + sp->index_buffer.index_size : 0, + info->index_bias, + info->min_index, + info->max_index, + mapped_indices); + /* draw! */ - draw_arrays_instanced(draw, mode, start, count, startInstance, instanceCount); + draw_arrays_instanced(draw, info->mode, info->start, info->count, + info->start_instance, info->instance_count); /* unmap vertex/index buffers - will cause draw module to flush */ for (i = 0; i < sp->num_vertex_buffers; i++) { draw_set_mapped_vertex_buffer(draw, i, NULL); } - if (indexBuffer) { + if (mapped_indices) { draw_set_mapped_element_buffer(draw, 0, 0, NULL); } @@ -185,6 +174,49 @@ softpipe_draw_range_elements_instanced(struct pipe_context *pipe, sp->dirty_render_cache = TRUE; } +static void +softpipe_draw_range_elements_instanced(struct pipe_context *pipe, + struct pipe_resource *indexBuffer, + unsigned indexSize, + int indexBias, + unsigned minIndex, + unsigned maxIndex, + unsigned mode, + unsigned start, + unsigned count, + unsigned startInstance, + unsigned instanceCount) +{ + struct softpipe_context *sp = softpipe_context(pipe); + struct pipe_draw_info info; + struct pipe_index_buffer saved_ib, ib; + + util_draw_init_info(&info); + info.mode = mode; + info.start = start; + info.count = count; + info.start_instance = startInstance; + info.instance_count = instanceCount; + info.index_bias = indexBias; + info.min_index = minIndex; + info.max_index = maxIndex; + + if (indexBuffer) { + info.indexed = TRUE; + + saved_ib = sp->index_buffer; + ib.buffer = indexBuffer; + ib.offset = 0; + ib.index_size = indexSize; + pipe->set_index_buffer(pipe, &ib); + } + + softpipe_draw_vbo(pipe, &info); + + if (indexBuffer) + pipe->set_index_buffer(pipe, &saved_ib); +} + void softpipe_draw_range_elements(struct pipe_context *pipe, diff --git a/src/gallium/drivers/softpipe/sp_state.h b/src/gallium/drivers/softpipe/sp_state.h index 7d6b86dce0..f04b0a5d31 100644 --- a/src/gallium/drivers/softpipe/sp_state.h +++ b/src/gallium/drivers/softpipe/sp_state.h @@ -221,6 +221,9 @@ void softpipe_set_vertex_buffers(struct pipe_context *, unsigned count, const struct pipe_vertex_buffer *); +void softpipe_set_index_buffer(struct pipe_context *, + const struct pipe_index_buffer *); + void softpipe_update_derived( struct softpipe_context *softpipe ); @@ -260,6 +263,10 @@ softpipe_draw_elements_instanced(struct pipe_context *pipe, unsigned startInstance, unsigned instanceCount); +void +softpipe_draw_vbo(struct pipe_context *pipe, + const struct pipe_draw_info *info); + void softpipe_draw_stream_output(struct pipe_context *pipe, unsigned mode); void diff --git a/src/gallium/drivers/softpipe/sp_state_vertex.c b/src/gallium/drivers/softpipe/sp_state_vertex.c index 462f4d2655..880a7c7cd2 100644 --- a/src/gallium/drivers/softpipe/sp_state_vertex.c +++ b/src/gallium/drivers/softpipe/sp_state_vertex.c @@ -88,3 +88,17 @@ softpipe_set_vertex_buffers(struct pipe_context *pipe, draw_set_vertex_buffers(softpipe->draw, count, buffers); } + +void +softpipe_set_index_buffer(struct pipe_context *pipe, + const struct pipe_index_buffer *ib) +{ + struct softpipe_context *softpipe = softpipe_context(pipe); + + if (ib) + memcpy(&softpipe->index_buffer, ib, sizeof(softpipe->index_buffer)); + else + memset(&softpipe->index_buffer, 0, sizeof(softpipe->index_buffer)); + + /* TODO make this more like a state */ +} diff --git a/src/gallium/drivers/svga/svga_context.h b/src/gallium/drivers/svga/svga_context.h index 9a46de643f..67a7614c8a 100644 --- a/src/gallium/drivers/svga/svga_context.h +++ b/src/gallium/drivers/svga/svga_context.h @@ -190,6 +190,7 @@ struct svga_state struct svga_vertex_shader *vs; struct pipe_vertex_buffer vb[PIPE_MAX_ATTRIBS]; + struct pipe_index_buffer ib; struct pipe_resource *cb[PIPE_SHADER_TYPES]; struct pipe_framebuffer_state framebuffer; diff --git a/src/gallium/drivers/svga/svga_pipe_draw.c b/src/gallium/drivers/svga/svga_pipe_draw.c index 58e930d983..fceaa83d70 100644 --- a/src/gallium/drivers/svga/svga_pipe_draw.c +++ b/src/gallium/drivers/svga/svga_pipe_draw.c @@ -248,10 +248,34 @@ svga_draw_arrays( struct pipe_context *pipe, start, count); } +static void +svga_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) +{ + struct svga_context *svga = svga_context(pipe); + + if (info->indexed && svga->curr.ib.buffer) { + unsigned offset; + + assert(svga->curr.ib.offset % svga->curr.ib.index_size == 0); + offset = svga->curr.ib.offset / svga->curr.ib.index_size; + + svga_draw_range_elements(pipe, svga->curr.ib.buffer, + svga->curr.ib.index_size, info->index_bias, + info->min_index, info->max_index, + info->mode, info->start + offset, info->count); + } + else { + svga_draw_range_elements(pipe, NULL, 0, 0, + info->min_index, info->max_index, + info->mode, info->start, info->count); + } +} + void svga_init_draw_functions( struct svga_context *svga ) { svga->pipe.draw_arrays = svga_draw_arrays; svga->pipe.draw_elements = svga_draw_elements; svga->pipe.draw_range_elements = svga_draw_range_elements; + svga->pipe.draw_vbo = svga_draw_vbo; } diff --git a/src/gallium/drivers/svga/svga_pipe_vertex.c b/src/gallium/drivers/svga/svga_pipe_vertex.c index 23808ad08e..86c79459f3 100644 --- a/src/gallium/drivers/svga/svga_pipe_vertex.c +++ b/src/gallium/drivers/svga/svga_pipe_vertex.c @@ -66,6 +66,24 @@ static void svga_set_vertex_buffers(struct pipe_context *pipe, } +static void svga_set_index_buffer(struct pipe_context *pipe, + const struct pipe_index_buffer *ib) +{ + struct svga_context *svga = svga_context(pipe); + + if (ib) { + pipe_resource_reference(&svga->curr.ib.buffer, ib->buffer); + memcpy(&svga->curr.ib, ib, sizeof(svga->curr.ib)); + } + else { + pipe_resource_reference(&svga->curr.ib.buffer, NULL); + memset(&svga->curr.ib, 0, sizeof(svga->curr.ib)); + } + + /* TODO make this more like a state */ +} + + static void * svga_create_vertex_elements_state(struct pipe_context *pipe, unsigned count, @@ -109,6 +127,7 @@ void svga_cleanup_vertex_state( struct svga_context *svga ) void svga_init_vertex_functions( struct svga_context *svga ) { svga->pipe.set_vertex_buffers = svga_set_vertex_buffers; + svga->pipe.set_index_buffer = svga_set_index_buffer; svga->pipe.create_vertex_elements_state = svga_create_vertex_elements_state; svga->pipe.bind_vertex_elements_state = svga_bind_vertex_elements_state; svga->pipe.delete_vertex_elements_state = svga_delete_vertex_elements_state; diff --git a/src/gallium/drivers/trace/tr_context.c b/src/gallium/drivers/trace/tr_context.c index 55dd6cf883..91c9bf0999 100644 --- a/src/gallium/drivers/trace/tr_context.c +++ b/src/gallium/drivers/trace/tr_context.c @@ -167,6 +167,32 @@ trace_context_draw_range_elements(struct pipe_context *_pipe, } +static INLINE void +trace_context_draw_vbo(struct pipe_context *_pipe, + const struct pipe_draw_info *info) +{ + struct trace_context *tr_ctx = trace_context(_pipe); + struct pipe_context *pipe = tr_ctx->pipe; + + trace_dump_call_begin("pipe_context", "draw_vbo"); + + trace_dump_arg(ptr, pipe); + trace_dump_arg(bool, info->indexed); + trace_dump_arg(uint, info->mode); + trace_dump_arg(uint, info->start); + trace_dump_arg(uint, info->count); + trace_dump_arg(uint, info->start_instance); + trace_dump_arg(uint, info->instance_count); + trace_dump_arg(int, info->index_bias); + trace_dump_arg(uint, info->min_index); + trace_dump_arg(uint, info->max_index); + + pipe->draw_vbo(pipe, info); + + trace_dump_call_end(); +} + + static INLINE struct pipe_query * trace_context_create_query(struct pipe_context *_pipe, unsigned query_type) @@ -1044,6 +1070,30 @@ trace_context_set_vertex_buffers(struct pipe_context *_pipe, } +static INLINE void +trace_context_set_index_buffer(struct pipe_context *_pipe, + const struct pipe_index_buffer *_ib) +{ + struct trace_context *tr_ctx = trace_context(_pipe); + struct pipe_context *pipe = tr_ctx->pipe; + struct pipe_index_buffer unwrapped_ib, *ib = NULL; + + if (_ib) { + unwrapped_ib = *_ib; + unwrapped_ib.buffer = trace_resource_unwrap(tr_ctx, _ib->buffer); + ib = &unwrapped_ib; + } + + trace_dump_call_begin("pipe_context", "set_index_buffer"); + + trace_dump_arg(ptr, pipe); + trace_dump_arg(index_buffer, ib); + + pipe->set_index_buffer(pipe, ib); + + trace_dump_call_end(); +} + static INLINE void trace_context_resource_copy_region(struct pipe_context *_pipe, struct pipe_resource *dst, @@ -1436,6 +1486,7 @@ trace_context_create(struct trace_screen *tr_scr, tr_ctx->base.draw_arrays = trace_context_draw_arrays; tr_ctx->base.draw_elements = trace_context_draw_elements; tr_ctx->base.draw_range_elements = trace_context_draw_range_elements; + tr_ctx->base.draw_vbo = trace_context_draw_vbo; tr_ctx->base.create_query = trace_context_create_query; tr_ctx->base.destroy_query = trace_context_destroy_query; tr_ctx->base.begin_query = trace_context_begin_query; @@ -1477,6 +1528,7 @@ trace_context_create(struct trace_screen *tr_scr, tr_ctx->base.create_sampler_view = trace_create_sampler_view; tr_ctx->base.sampler_view_destroy = trace_sampler_view_destroy; tr_ctx->base.set_vertex_buffers = trace_context_set_vertex_buffers; + tr_ctx->base.set_index_buffer = trace_context_set_index_buffer; tr_ctx->base.resource_copy_region = trace_context_resource_copy_region; tr_ctx->base.clear = trace_context_clear; tr_ctx->base.clear_render_target = trace_context_clear_render_target; diff --git a/src/gallium/drivers/trace/tr_dump_state.c b/src/gallium/drivers/trace/tr_dump_state.c index 1727c2a020..bd9a9bfaf1 100644 --- a/src/gallium/drivers/trace/tr_dump_state.c +++ b/src/gallium/drivers/trace/tr_dump_state.c @@ -533,6 +533,26 @@ void trace_dump_vertex_buffer(const struct pipe_vertex_buffer *state) } +void trace_dump_index_buffer(const struct pipe_index_buffer *state) +{ + if (!trace_dumping_enabled_locked()) + return; + + if(!state) { + trace_dump_null(); + return; + } + + trace_dump_struct_begin("pipe_index_buffer"); + + trace_dump_member(uint, state, index_size); + trace_dump_member(uint, state, offset); + trace_dump_member(resource_ptr, state, buffer); + + trace_dump_struct_end(); +} + + void trace_dump_vertex_element(const struct pipe_vertex_element *state) { if (!trace_dumping_enabled_locked()) diff --git a/src/gallium/drivers/trace/tr_dump_state.h b/src/gallium/drivers/trace/tr_dump_state.h index e614e8355e..2e70f4e1c7 100644 --- a/src/gallium/drivers/trace/tr_dump_state.h +++ b/src/gallium/drivers/trace/tr_dump_state.h @@ -75,6 +75,8 @@ void trace_dump_transfer(const struct pipe_transfer *state); void trace_dump_vertex_buffer(const struct pipe_vertex_buffer *state); +void trace_dump_index_buffer(const struct pipe_index_buffer *state); + void trace_dump_vertex_element(const struct pipe_vertex_element *state); -- cgit v1.2.3 From a57f84251926045a3358822d0fd92ca95a4f0fde Mon Sep 17 00:00:00 2001 From: Chia-I Wu Date: Sat, 17 Jul 2010 01:10:46 +0800 Subject: gallium: Keep only pipe_context::draw_vbo. That is, remove pipe_context::draw_arrays, pipe_context::draw_elements, pipe_context::draw_arrays_instanced, pipe_context::draw_elements_instanced, pipe_context::draw_range_elements. --- src/gallium/drivers/cell/ppu/cell_draw_arrays.c | 63 --------- src/gallium/drivers/failover/fo_context.c | 44 ------- src/gallium/drivers/galahad/glhd_context.c | 68 ---------- src/gallium/drivers/i915/i915_context.c | 62 --------- src/gallium/drivers/i965/brw_draw.c | 64 --------- src/gallium/drivers/identity/id_context.c | 68 ---------- src/gallium/drivers/llvmpipe/lp_draw_arrays.c | 151 ---------------------- src/gallium/drivers/nv50/nv50_context.c | 4 - src/gallium/drivers/nv50/nv50_context.h | 18 --- src/gallium/drivers/nv50/nv50_vbo.c | 21 +-- src/gallium/drivers/nvfx/nvfx_context.c | 2 - src/gallium/drivers/nvfx/nvfx_context.h | 7 - src/gallium/drivers/nvfx/nvfx_vbo.c | 4 +- src/gallium/drivers/r300/r300_render.c | 69 ---------- src/gallium/drivers/r300/r300_render_stencilref.c | 55 +------- src/gallium/drivers/r600/r600_context.c | 3 - src/gallium/drivers/r600/r600_context.h | 11 -- src/gallium/drivers/r600/r600_draw.c | 50 ------- src/gallium/drivers/rbug/rbug_context.c | 86 ------------ src/gallium/drivers/softpipe/sp_context.c | 5 - src/gallium/drivers/softpipe/sp_draw_arrays.c | 150 --------------------- src/gallium/drivers/softpipe/sp_state.h | 35 ----- src/gallium/drivers/svga/svga_pipe_draw.c | 25 ---- src/gallium/drivers/trace/tr_context.c | 88 ------------- src/gallium/include/pipe/p_context.h | 41 ------ 25 files changed, 6 insertions(+), 1188 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/ppu/cell_draw_arrays.c b/src/gallium/drivers/cell/ppu/cell_draw_arrays.c index e06226fbfe..4adef5b8c0 100644 --- a/src/gallium/drivers/cell/ppu/cell_draw_arrays.c +++ b/src/gallium/drivers/cell/ppu/cell_draw_arrays.c @@ -34,7 +34,6 @@ #include "pipe/p_defines.h" #include "pipe/p_context.h" #include "util/u_inlines.h" -#include "util/u_draw_quad.h" #include "cell_context.h" #include "cell_draw_arrays.h" @@ -113,71 +112,9 @@ cell_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) } -static void -cell_draw_range_elements(struct pipe_context *pipe, - struct pipe_resource *indexBuffer, - unsigned indexSize, - int indexBias, - unsigned min_index, - unsigned max_index, - unsigned mode, unsigned start, unsigned count) -{ - struct cell_context *cell = cell_context(pipe); - struct pipe_draw_info info; - struct pipe_index_buffer saved_ib, ib; - - util_draw_init_info(&info); - info.mode = mode; - info.start = start; - info.count = count; - info.index_bias = indexBias; - info.min_index = min_index; - info.max_index = max_index; - - if (indexBuffer) { - info.indexed = TRUE; - saved_ib = cell->index_buffer; - - ib.buffer = indexBuffer; - ib.offset = 0; - ib.index_size = indexSize; - pipe->set_index_buffer(pipe, &ib); - } - - cell_draw_vbo(pipe, &info); - - if (indexBuffer) - pipe->set_index_buffer(pipe, &saved_ib); -} - - -static void -cell_draw_elements(struct pipe_context *pipe, - struct pipe_resource *indexBuffer, - unsigned indexSize, int indexBias, - unsigned mode, unsigned start, unsigned count) -{ - cell_draw_range_elements( pipe, indexBuffer, - indexSize, indexBias, - 0, 0xffffffff, - mode, start, count ); -} - - -static void -cell_draw_arrays(struct pipe_context *pipe, unsigned mode, - unsigned start, unsigned count) -{ - cell_draw_elements(pipe, NULL, 0, 0, mode, start, count); -} - - void cell_init_draw_functions(struct cell_context *cell) { - cell->pipe.draw_arrays = cell_draw_arrays; - cell->pipe.draw_elements = cell_draw_elements; - cell->pipe.draw_range_elements = cell_draw_range_elements; cell->pipe.draw_vbo = cell_draw_vbo; } diff --git a/src/gallium/drivers/failover/fo_context.c b/src/gallium/drivers/failover/fo_context.c index 1048d58313..761a0fce72 100644 --- a/src/gallium/drivers/failover/fo_context.c +++ b/src/gallium/drivers/failover/fo_context.c @@ -28,7 +28,6 @@ #include "pipe/p_defines.h" #include "util/u_memory.h" -#include "util/u_draw_quad.h" #include "pipe/p_context.h" #include "fo_context.h" @@ -88,47 +87,6 @@ static void failover_draw_vbo( struct pipe_context *pipe, } } - -static void failover_draw_elements( struct pipe_context *pipe, - struct pipe_resource *indexResource, - unsigned indexSize, - int indexBias, - unsigned prim, - unsigned start, - unsigned count) -{ - struct failover_context *failover = failover_context( pipe ); - struct pipe_draw_info info; - struct pipe_index_buffer saved_ib, ib; - - util_draw_init_info(&info); - info.mode = prim; - info.start = start; - info.count = count; - - if (indexResource) { - info.indexed = TRUE; - saved_ib = failover->index_buffer; - - ib.buffer = indexResource; - ib.offset = 0; - ib.index_size = indexSize; - pipe->set_index_buffer(pipe, &ib); - } - - failover_draw_vbo(pipe, &info); - - if (indexResource) - pipe->set_index_buffer(pipe, &saved_ib); -} - - -static void failover_draw_arrays( struct pipe_context *pipe, - unsigned prim, unsigned start, unsigned count) -{ - failover_draw_elements(pipe, NULL, 0, 0, prim, start, count); -} - static unsigned int failover_is_resource_referenced( struct pipe_context *_pipe, struct pipe_resource *resource, @@ -161,8 +119,6 @@ struct pipe_context *failover_create( struct pipe_context *hw, failover->pipe.get_paramf = hw->get_paramf; #endif - failover->pipe.draw_arrays = failover_draw_arrays; - failover->pipe.draw_elements = failover_draw_elements; failover->pipe.draw_vbo = failover_draw_vbo; failover->pipe.clear = hw->clear; failover->pipe.clear_render_target = hw->clear_render_target; diff --git a/src/gallium/drivers/galahad/glhd_context.c b/src/gallium/drivers/galahad/glhd_context.c index 6473f2d499..fe14a287ef 100644 --- a/src/gallium/drivers/galahad/glhd_context.c +++ b/src/gallium/drivers/galahad/glhd_context.c @@ -47,71 +47,6 @@ galahad_destroy(struct pipe_context *_pipe) FREE(glhd_pipe); } -static void -galahad_draw_arrays(struct pipe_context *_pipe, - unsigned prim, - unsigned start, - unsigned count) -{ - struct galahad_context *glhd_pipe = galahad_context(_pipe); - struct pipe_context *pipe = glhd_pipe->pipe; - - pipe->draw_arrays(pipe, - prim, - start, - count); -} - -static void -galahad_draw_elements(struct pipe_context *_pipe, - struct pipe_resource *_indexResource, - unsigned indexSize, - int indexBias, - unsigned prim, - unsigned start, - unsigned count) -{ - struct galahad_context *glhd_pipe = galahad_context(_pipe); - struct galahad_resource *glhd_resource = galahad_resource(_indexResource); - struct pipe_context *pipe = glhd_pipe->pipe; - struct pipe_resource *indexResource = glhd_resource->resource; - - pipe->draw_elements(pipe, - indexResource, - indexSize, - indexBias, - prim, - start, - count); -} - -static void -galahad_draw_range_elements(struct pipe_context *_pipe, - struct pipe_resource *_indexResource, - unsigned indexSize, - int indexBias, - unsigned minIndex, - unsigned maxIndex, - unsigned mode, - unsigned start, - unsigned count) -{ - struct galahad_context *glhd_pipe = galahad_context(_pipe); - struct galahad_resource *glhd_resource = galahad_resource(_indexResource); - struct pipe_context *pipe = glhd_pipe->pipe; - struct pipe_resource *indexResource = glhd_resource->resource; - - pipe->draw_range_elements(pipe, - indexResource, - indexSize, - indexBias, - minIndex, - maxIndex, - mode, - start, - count); -} - static void galahad_draw_vbo(struct pipe_context *_pipe, const struct pipe_draw_info *info) @@ -979,9 +914,6 @@ galahad_context_create(struct pipe_screen *_screen, struct pipe_context *pipe) glhd_pipe->base.draw = NULL; glhd_pipe->base.destroy = galahad_destroy; - glhd_pipe->base.draw_arrays = galahad_draw_arrays; - glhd_pipe->base.draw_elements = galahad_draw_elements; - glhd_pipe->base.draw_range_elements = galahad_draw_range_elements; glhd_pipe->base.draw_vbo = galahad_draw_vbo; glhd_pipe->base.create_query = galahad_create_query; glhd_pipe->base.destroy_query = galahad_destroy_query; diff --git a/src/gallium/drivers/i915/i915_context.c b/src/gallium/drivers/i915/i915_context.c index ca07b3e235..496efc99da 100644 --- a/src/gallium/drivers/i915/i915_context.c +++ b/src/gallium/drivers/i915/i915_context.c @@ -36,7 +36,6 @@ #include "pipe/p_defines.h" #include "util/u_inlines.h" #include "util/u_memory.h" -#include "util/u_draw_quad.h" #include "pipe/p_screen.h" @@ -102,64 +101,6 @@ i915_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) } } -static void -i915_draw_range_elements(struct pipe_context *pipe, - struct pipe_resource *indexBuffer, - unsigned indexSize, - int indexBias, - unsigned min_index, - unsigned max_index, - unsigned prim, unsigned start, unsigned count) -{ - struct i915_context *i915 = i915_context(pipe); - struct pipe_draw_info info; - struct pipe_index_buffer saved_ib, ib; - - util_draw_init_info(&info); - info.mode = prim; - info.start = start; - info.count = count; - info.index_bias = indexBias; - info.min_index = min_index; - info.max_index = max_index; - - if (indexBuffer) { - info.indexed = TRUE; - saved_ib = i915->index_buffer; - - ib.buffer = indexBuffer; - ib.offset = 0; - ib.index_size = indexSize; - pipe->set_index_buffer(pipe, &ib); - } - - i915_draw_vbo(pipe, &info); - - if (indexBuffer) - pipe->set_index_buffer(pipe, &saved_ib); -} - -static void -i915_draw_elements(struct pipe_context *pipe, - struct pipe_resource *indexBuffer, - unsigned indexSize, int indexBias, - unsigned prim, unsigned start, unsigned count) -{ - i915_draw_range_elements(pipe, indexBuffer, - indexSize, indexBias, - 0, 0xffffffff, - prim, start, count); -} - -static void -i915_draw_arrays(struct pipe_context *pipe, - unsigned prim, unsigned start, unsigned count) -{ - i915_draw_elements(pipe, NULL, 0, 0, prim, start, count); -} - - - /* * Generic context functions @@ -203,9 +144,6 @@ i915_create_context(struct pipe_screen *screen, void *priv) i915->base.clear = i915_clear; - i915->base.draw_arrays = i915_draw_arrays; - i915->base.draw_elements = i915_draw_elements; - i915->base.draw_range_elements = i915_draw_range_elements; i915->base.draw_vbo = i915_draw_vbo; /* diff --git a/src/gallium/drivers/i965/brw_draw.c b/src/gallium/drivers/i965/brw_draw.c index fa7d047e0b..3ab9024c31 100644 --- a/src/gallium/drivers/i965/brw_draw.c +++ b/src/gallium/drivers/i965/brw_draw.c @@ -29,7 +29,6 @@ #include "util/u_inlines.h" #include "util/u_prim.h" #include "util/u_upload_mgr.h" -#include "util/u_draw_quad.h" #include "brw_draw.h" #include "brw_defines.h" @@ -220,74 +219,11 @@ brw_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) } } -static void -brw_draw_range_elements(struct pipe_context *pipe, - struct pipe_resource *index_buffer, - unsigned index_size, int index_bias, - unsigned min_index, - unsigned max_index, - unsigned mode, unsigned start, unsigned count) -{ - struct brw_context *brw = brw_context(pipe); - struct pipe_draw_info info; - struct pipe_index_buffer saved_ib, ib; - - util_draw_init_info(&info); - info.mode = mode; - info.start = start; - info.count = count; - info.index_bias = index_bias; - info.min_index = min_index; - info.max_index = max_index; - - if (index_buffer) { - info.indexed = TRUE; - saved_ib.buffer = brw->curr.index_buffer; - saved_ib.offset = brw->curr.index_offset; - saved_ib.index_size = brw->curr.index_size; - - ib.buffer = index_buffer; - ib.offset = 0; - ib.index_size = index_size; - pipe->set_index_buffer(pipe, &ib); - } - - brw_draw_vbo(pipe, &info); - - if (index_buffer) - pipe->set_index_buffer(pipe, &saved_ib); -} - -static void -brw_draw_elements(struct pipe_context *pipe, - struct pipe_resource *index_buffer, - unsigned index_size, int index_bias, - unsigned mode, - unsigned start, unsigned count) -{ - brw_draw_range_elements( pipe, index_buffer, - index_size, index_bias, - 0, 0xffffffff, - mode, - start, count ); -} - -static void -brw_draw_arrays(struct pipe_context *pipe, unsigned mode, - unsigned start, unsigned count) -{ - brw_draw_elements(pipe, NULL, 0, 0, mode, start, count); -} - - boolean brw_draw_init( struct brw_context *brw ) { /* Register our drawing function: */ - brw->base.draw_arrays = brw_draw_arrays; - brw->base.draw_elements = brw_draw_elements; - brw->base.draw_range_elements = brw_draw_range_elements; brw->base.draw_vbo = brw_draw_vbo; /* Create helpers for uploading data in user buffers: diff --git a/src/gallium/drivers/identity/id_context.c b/src/gallium/drivers/identity/id_context.c index e10d3a1413..de83c24905 100644 --- a/src/gallium/drivers/identity/id_context.c +++ b/src/gallium/drivers/identity/id_context.c @@ -45,71 +45,6 @@ identity_destroy(struct pipe_context *_pipe) FREE(id_pipe); } -static void -identity_draw_arrays(struct pipe_context *_pipe, - unsigned prim, - unsigned start, - unsigned count) -{ - struct identity_context *id_pipe = identity_context(_pipe); - struct pipe_context *pipe = id_pipe->pipe; - - pipe->draw_arrays(pipe, - prim, - start, - count); -} - -static void -identity_draw_elements(struct pipe_context *_pipe, - struct pipe_resource *_indexResource, - unsigned indexSize, - int indexBias, - unsigned prim, - unsigned start, - unsigned count) -{ - struct identity_context *id_pipe = identity_context(_pipe); - struct identity_resource *id_resource = identity_resource(_indexResource); - struct pipe_context *pipe = id_pipe->pipe; - struct pipe_resource *indexResource = id_resource->resource; - - pipe->draw_elements(pipe, - indexResource, - indexSize, - indexBias, - prim, - start, - count); -} - -static void -identity_draw_range_elements(struct pipe_context *_pipe, - struct pipe_resource *_indexResource, - unsigned indexSize, - int indexBias, - unsigned minIndex, - unsigned maxIndex, - unsigned mode, - unsigned start, - unsigned count) -{ - struct identity_context *id_pipe = identity_context(_pipe); - struct identity_resource *id_resource = identity_resource(_indexResource); - struct pipe_context *pipe = id_pipe->pipe; - struct pipe_resource *indexResource = id_resource->resource; - - pipe->draw_range_elements(pipe, - indexResource, - indexSize, - indexBias, - minIndex, - maxIndex, - mode, - start, - count); -} - static void identity_draw_vbo(struct pipe_context *_pipe, const struct pipe_draw_info *info) @@ -917,9 +852,6 @@ identity_context_create(struct pipe_screen *_screen, struct pipe_context *pipe) id_pipe->base.draw = NULL; id_pipe->base.destroy = identity_destroy; - id_pipe->base.draw_arrays = identity_draw_arrays; - id_pipe->base.draw_elements = identity_draw_elements; - id_pipe->base.draw_range_elements = identity_draw_range_elements; id_pipe->base.draw_vbo = identity_draw_vbo; id_pipe->base.create_query = identity_create_query; id_pipe->base.destroy_query = identity_destroy_query; diff --git a/src/gallium/drivers/llvmpipe/lp_draw_arrays.c b/src/gallium/drivers/llvmpipe/lp_draw_arrays.c index b6dbb9d288..22c2836e22 100644 --- a/src/gallium/drivers/llvmpipe/lp_draw_arrays.c +++ b/src/gallium/drivers/llvmpipe/lp_draw_arrays.c @@ -34,7 +34,6 @@ #include "pipe/p_defines.h" #include "pipe/p_context.h" #include "util/u_prim.h" -#include "util/u_draw_quad.h" #include "lp_context.h" #include "lp_state.h" @@ -109,158 +108,8 @@ llvmpipe_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) } -static void -llvmpipe_draw_range_elements_instanced(struct pipe_context *pipe, - struct pipe_resource *indexBuffer, - unsigned indexSize, - int indexBias, - unsigned minIndex, - unsigned maxIndex, - unsigned mode, - unsigned start, - unsigned count, - unsigned startInstance, - unsigned instanceCount) -{ - struct llvmpipe_context *lp = llvmpipe_context(pipe); - struct pipe_draw_info info; - struct pipe_index_buffer saved_ib, ib; - - util_draw_init_info(&info); - info.mode = mode; - info.start = start; - info.count = count; - info.start_instance = startInstance; - info.instance_count = instanceCount; - - info.index_bias = indexBias; - info.min_index = minIndex; - info.max_index = maxIndex; - - if (indexBuffer) { - info.indexed = TRUE; - saved_ib = lp->index_buffer; - - ib.buffer = indexBuffer; - ib.offset = 0; - ib.index_size = indexSize; - pipe->set_index_buffer(pipe, &ib); - } - - llvmpipe_draw_vbo(pipe, &info); - - if (indexBuffer) - pipe->set_index_buffer(pipe, &saved_ib); -} - -static void -llvmpipe_draw_arrays_instanced(struct pipe_context *pipe, - unsigned mode, - unsigned start, - unsigned count, - unsigned startInstance, - unsigned instanceCount) -{ - llvmpipe_draw_range_elements_instanced(pipe, - NULL, /* no indexBuffer */ - 0, 0, /* indexSize, indexBias */ - 0, ~0, /* minIndex, maxIndex */ - mode, - start, - count, - startInstance, - instanceCount); -} - - -static void -llvmpipe_draw_elements_instanced(struct pipe_context *pipe, - struct pipe_resource *indexBuffer, - unsigned indexSize, - int indexBias, - unsigned mode, - unsigned start, - unsigned count, - unsigned startInstance, - unsigned instanceCount) -{ - llvmpipe_draw_range_elements_instanced(pipe, - indexBuffer, - indexSize, indexBias, - 0, ~0, /* minIndex, maxIndex */ - mode, - start, - count, - startInstance, - instanceCount); -} - - -static void -llvmpipe_draw_elements(struct pipe_context *pipe, - struct pipe_resource *indexBuffer, - unsigned indexSize, - int indexBias, - unsigned mode, - unsigned start, - unsigned count) -{ - llvmpipe_draw_range_elements_instanced(pipe, - indexBuffer, - indexSize, indexBias, - 0, 0xffffffff, /* min, maxIndex */ - mode, start, count, - 0, /* startInstance */ - 1); /* instanceCount */ -} - - -static void -llvmpipe_draw_range_elements(struct pipe_context *pipe, - struct pipe_resource *indexBuffer, - unsigned indexSize, - int indexBias, - unsigned min_index, - unsigned max_index, - unsigned mode, - unsigned start, - unsigned count) -{ - llvmpipe_draw_range_elements_instanced(pipe, - indexBuffer, - indexSize, indexBias, - min_index, max_index, - mode, start, count, - 0, /* startInstance */ - 1); /* instanceCount */ -} - - -static void -llvmpipe_draw_arrays(struct pipe_context *pipe, - unsigned mode, - unsigned start, - unsigned count) -{ - llvmpipe_draw_range_elements_instanced(pipe, - NULL, /* indexBuffer */ - 0, /* indexSize */ - 0, /* indexBias */ - 0, ~0, /* min, maxIndex */ - mode, start, count, - 0, /* startInstance */ - 1); /* instanceCount */ -} - - void llvmpipe_init_draw_funcs(struct llvmpipe_context *llvmpipe) { - llvmpipe->pipe.draw_arrays = llvmpipe_draw_arrays; - llvmpipe->pipe.draw_elements = llvmpipe_draw_elements; - llvmpipe->pipe.draw_range_elements = llvmpipe_draw_range_elements; - llvmpipe->pipe.draw_arrays_instanced = llvmpipe_draw_arrays_instanced; - llvmpipe->pipe.draw_elements_instanced = llvmpipe_draw_elements_instanced; - llvmpipe->pipe.draw_vbo = llvmpipe_draw_vbo; } diff --git a/src/gallium/drivers/nv50/nv50_context.c b/src/gallium/drivers/nv50/nv50_context.c index 3fc39c1137..0874cb5e4e 100644 --- a/src/gallium/drivers/nv50/nv50_context.c +++ b/src/gallium/drivers/nv50/nv50_context.c @@ -82,10 +82,6 @@ nv50_create(struct pipe_screen *pscreen, void *priv) nv50->pipe.destroy = nv50_destroy; - nv50->pipe.draw_arrays = nv50_draw_arrays; - nv50->pipe.draw_arrays_instanced = nv50_draw_arrays_instanced; - nv50->pipe.draw_elements = nv50_draw_elements; - nv50->pipe.draw_elements_instanced = nv50_draw_elements_instanced; nv50->pipe.draw_vbo = nv50_draw_vbo; nv50->pipe.clear = nv50_clear; diff --git a/src/gallium/drivers/nv50/nv50_context.h b/src/gallium/drivers/nv50/nv50_context.h index a7c2b5d487..d24d6c50ea 100644 --- a/src/gallium/drivers/nv50/nv50_context.h +++ b/src/gallium/drivers/nv50/nv50_context.h @@ -180,24 +180,6 @@ nv50_surface_do_copy(struct nv50_screen *screen, struct pipe_surface *dst, extern struct draw_stage *nv50_draw_render_stage(struct nv50_context *nv50); /* nv50_vbo.c */ -extern void nv50_draw_arrays(struct pipe_context *, unsigned mode, - unsigned start, unsigned count); -extern void nv50_draw_arrays_instanced(struct pipe_context *, unsigned mode, - unsigned start, unsigned count, - unsigned startInstance, - unsigned instanceCount); -extern void nv50_draw_elements(struct pipe_context *pipe, - struct pipe_resource *indexBuffer, - unsigned indexSize, int indexBias, - unsigned mode, unsigned start, - unsigned count); -extern void nv50_draw_elements_instanced(struct pipe_context *pipe, - struct pipe_resource *indexBuffer, - unsigned indexSize, int indexBias, - unsigned mode, unsigned start, - unsigned count, - unsigned startInstance, - unsigned instanceCount); extern void nv50_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info); extern void nv50_vtxelt_construct(struct nv50_vtxelt_stateobj *cso); diff --git a/src/gallium/drivers/nv50/nv50_vbo.c b/src/gallium/drivers/nv50/nv50_vbo.c index 11ffc182c2..e7f8fe33ed 100644 --- a/src/gallium/drivers/nv50/nv50_vbo.c +++ b/src/gallium/drivers/nv50/nv50_vbo.c @@ -173,7 +173,7 @@ instance_step(struct nv50_context *nv50, struct instance *a) } } -void +static void nv50_draw_arrays_instanced(struct pipe_context *pipe, unsigned mode, unsigned start, unsigned count, unsigned startInstance, unsigned instanceCount) @@ -220,13 +220,6 @@ nv50_draw_arrays_instanced(struct pipe_context *pipe, } } -void -nv50_draw_arrays(struct pipe_context *pipe, unsigned mode, unsigned start, - unsigned count) -{ - nv50_draw_arrays_instanced(pipe, mode, start, count, 0, 1); -} - struct inline_ctx { struct nv50_context *nv50; void *map; @@ -384,7 +377,7 @@ nv50_draw_elements_inline(struct pipe_context *pipe, pipe_buffer_unmap(pipe, indexBuffer, transfer); } -void +static void nv50_draw_elements_instanced(struct pipe_context *pipe, struct pipe_resource *indexBuffer, unsigned indexSize, int indexBias, @@ -463,16 +456,6 @@ nv50_draw_elements_instanced(struct pipe_context *pipe, } } -void -nv50_draw_elements(struct pipe_context *pipe, - struct pipe_resource *indexBuffer, - unsigned indexSize, int indexBias, - unsigned mode, unsigned start, unsigned count) -{ - nv50_draw_elements_instanced(pipe, indexBuffer, indexSize, indexBias, - mode, start, count, 0, 1); -} - void nv50_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) { diff --git a/src/gallium/drivers/nvfx/nvfx_context.c b/src/gallium/drivers/nvfx/nvfx_context.c index f30795f69a..7218abff22 100644 --- a/src/gallium/drivers/nvfx/nvfx_context.c +++ b/src/gallium/drivers/nvfx/nvfx_context.c @@ -55,8 +55,6 @@ nvfx_create(struct pipe_screen *pscreen, void *priv) nvfx->pipe.screen = pscreen; nvfx->pipe.priv = priv; nvfx->pipe.destroy = nvfx_destroy; - nvfx->pipe.draw_arrays = nvfx_draw_arrays; - nvfx->pipe.draw_elements = nvfx_draw_elements; nvfx->pipe.draw_vbo = nvfx_draw_vbo; nvfx->pipe.clear = nvfx_clear; nvfx->pipe.flush = nvfx_flush; diff --git a/src/gallium/drivers/nvfx/nvfx_context.h b/src/gallium/drivers/nvfx/nvfx_context.h index d6cd272eed..89f94c10bd 100644 --- a/src/gallium/drivers/nvfx/nvfx_context.h +++ b/src/gallium/drivers/nvfx/nvfx_context.h @@ -236,13 +236,6 @@ extern void nvfx_init_transfer_functions(struct nvfx_context *nvfx); /* nvfx_vbo.c */ extern boolean nvfx_vbo_validate(struct nvfx_context *nvfx); extern void nvfx_vbo_relocate(struct nvfx_context *nvfx); -extern void nvfx_draw_arrays(struct pipe_context *, unsigned mode, - unsigned start, unsigned count); -extern void nvfx_draw_elements(struct pipe_context *pipe, - struct pipe_resource *indexBuffer, - unsigned indexSize, int indexBias, - unsigned mode, unsigned start, - unsigned count); extern void nvfx_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info); diff --git a/src/gallium/drivers/nvfx/nvfx_vbo.c b/src/gallium/drivers/nvfx/nvfx_vbo.c index 23a59b589b..4aa3793842 100644 --- a/src/gallium/drivers/nvfx/nvfx_vbo.c +++ b/src/gallium/drivers/nvfx/nvfx_vbo.c @@ -158,7 +158,7 @@ nvfx_vbo_static_attrib(struct nvfx_context *nvfx, pipe_buffer_unmap(&nvfx->pipe, vb->buffer, transfer); } -void +static void nvfx_draw_arrays(struct pipe_context *pipe, unsigned mode, unsigned start, unsigned count) { @@ -463,7 +463,7 @@ nvfx_draw_elements_vbo(struct pipe_context *pipe, } } -void +static void nvfx_draw_elements(struct pipe_context *pipe, struct pipe_resource *indexBuffer, unsigned indexSize, int indexBias, diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c index da96098cc4..c179b07e86 100644 --- a/src/gallium/drivers/r300/r300_render.c +++ b/src/gallium/drivers/r300/r300_render.c @@ -33,7 +33,6 @@ #include "util/u_memory.h" #include "util/u_upload_mgr.h" #include "util/u_prim.h" -#include "util/u_draw_quad.h" #include "r300_cs.h" #include "r300_context.h" @@ -567,19 +566,6 @@ static void r300_draw_range_elements(struct pipe_context* pipe, } } -/* Simple helpers for context setup. Should probably be moved to util. */ -static void r300_draw_elements(struct pipe_context* pipe, - struct pipe_resource* indexBuffer, - unsigned indexSize, int indexBias, unsigned mode, - unsigned start, unsigned count) -{ - struct r300_context *r300 = r300_context(pipe); - - pipe->draw_range_elements(pipe, indexBuffer, indexSize, indexBias, - 0, r300->vertex_buffer_max_index, - mode, start, count); -} - static void r300_draw_arrays(struct pipe_context* pipe, unsigned mode, unsigned start, unsigned count) { @@ -735,54 +721,6 @@ static void r300_swtcl_draw_vbo(struct pipe_context* pipe, } } -/* SW TCL elements, using Draw. */ -static void r300_swtcl_draw_range_elements(struct pipe_context* pipe, - struct pipe_resource* indexBuffer, - unsigned indexSize, - int indexBias, - unsigned minIndex, - unsigned maxIndex, - unsigned mode, - unsigned start, - unsigned count) -{ - struct r300_context* r300 = r300_context(pipe); - struct pipe_draw_info info; - struct pipe_index_buffer saved_ib, ib; - - util_draw_init_info(&info); - info.mode = mode; - info.start = start; - info.count = count; - info.index_bias = indexBias; - info.min_index = minIndex; - info.max_index = maxIndex; - - if (indexBuffer) { - info.indexed = TRUE; - - saved_ib = r300->index_buffer; - ib.buffer = indexBuffer; - ib.offset = 0; - ib.index_size = indexSize; - pipe->set_index_buffer(pipe, &ib); - } - - r300_swtcl_draw_vbo(pipe, &info); - - if (indexBuffer) - pipe->set_index_buffer(pipe, &saved_ib); -} - -static void r300_swtcl_draw_arrays(struct pipe_context* pipe, - unsigned mode, - unsigned start, - unsigned count) -{ - r300_swtcl_draw_range_elements(pipe, NULL, 0, 0, - start, start + count -1, mode, start, count); -} - /* Object for rendering using Draw. */ struct r300_render { /* Parent class */ @@ -1179,17 +1117,10 @@ static void r300_resource_resolve(struct pipe_context* pipe, void r300_init_render_functions(struct r300_context *r300) { - /* Set generic functions. */ - r300->context.draw_elements = r300_draw_elements; - /* Set draw functions based on presence of HW TCL. */ if (r300->screen->caps.has_tcl) { - r300->context.draw_arrays = r300_draw_arrays; - r300->context.draw_range_elements = r300_draw_range_elements; r300->context.draw_vbo = r300_draw_vbo; } else { - r300->context.draw_arrays = r300_swtcl_draw_arrays; - r300->context.draw_range_elements = r300_swtcl_draw_range_elements; r300->context.draw_vbo = r300_swtcl_draw_vbo; } diff --git a/src/gallium/drivers/r300/r300_render_stencilref.c b/src/gallium/drivers/r300/r300_render_stencilref.c index 6d801cf159..1f035d64a2 100644 --- a/src/gallium/drivers/r300/r300_render_stencilref.c +++ b/src/gallium/drivers/r300/r300_render_stencilref.c @@ -34,14 +34,6 @@ #include "r300_reg.h" struct r300_stencilref_context { - void (*draw_arrays)(struct pipe_context *pipe, - unsigned mode, unsigned start, unsigned count); - - void (*draw_range_elements)( - struct pipe_context *pipe, struct pipe_resource *indexBuffer, - unsigned indexSize, int indexBias, unsigned minIndex, unsigned maxIndex, - unsigned mode, unsigned start, unsigned count); - void (*draw_vbo)(struct pipe_context *pipe, const struct pipe_draw_info *info); @@ -108,45 +100,6 @@ static void r300_stencilref_end(struct r300_context *r300) r300->dsa_state.dirty = TRUE; } -static void r300_stencilref_draw_arrays(struct pipe_context *pipe, unsigned mode, - unsigned start, unsigned count) -{ - struct r300_context *r300 = r300_context(pipe); - struct r300_stencilref_context *sr = r300->stencilref_fallback; - - if (!r300_stencilref_needed(r300)) { - sr->draw_arrays(pipe, mode, start, count); - } else { - r300_stencilref_begin(r300); - sr->draw_arrays(pipe, mode, start, count); - r300_stencilref_switch_side(r300); - sr->draw_arrays(pipe, mode, start, count); - r300_stencilref_end(r300); - } -} - -static void r300_stencilref_draw_range_elements( - struct pipe_context *pipe, struct pipe_resource *indexBuffer, - unsigned indexSize, int indexBias, unsigned minIndex, unsigned maxIndex, - unsigned mode, unsigned start, unsigned count) -{ - struct r300_context *r300 = r300_context(pipe); - struct r300_stencilref_context *sr = r300->stencilref_fallback; - - if (!r300_stencilref_needed(r300)) { - sr->draw_range_elements(pipe, indexBuffer, indexSize, indexBias, - minIndex, maxIndex, mode, start, count); - } else { - r300_stencilref_begin(r300); - sr->draw_range_elements(pipe, indexBuffer, indexSize, indexBias, - minIndex, maxIndex, mode, start, count); - r300_stencilref_switch_side(r300); - sr->draw_range_elements(pipe, indexBuffer, indexSize, indexBias, - minIndex, maxIndex, mode, start, count); - r300_stencilref_end(r300); - } -} - static void r300_stencilref_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) { @@ -168,13 +121,9 @@ void r300_plug_in_stencil_ref_fallback(struct r300_context *r300) { r300->stencilref_fallback = CALLOC_STRUCT(r300_stencilref_context); - /* Save original draw functions. */ - r300->stencilref_fallback->draw_arrays = r300->context.draw_arrays; - r300->stencilref_fallback->draw_range_elements = r300->context.draw_range_elements; + /* Save original draw function. */ r300->stencilref_fallback->draw_vbo = r300->context.draw_vbo; - /* Override the draw functions. */ - r300->context.draw_arrays = r300_stencilref_draw_arrays; - r300->context.draw_range_elements = r300_stencilref_draw_range_elements; + /* Override the draw function. */ r300->context.draw_vbo = r300_stencilref_draw_vbo; } diff --git a/src/gallium/drivers/r600/r600_context.c b/src/gallium/drivers/r600/r600_context.c index 2c2bd4672b..fc8aa1b866 100644 --- a/src/gallium/drivers/r600/r600_context.c +++ b/src/gallium/drivers/r600/r600_context.c @@ -313,9 +313,6 @@ struct pipe_context *r600_create_context(struct pipe_screen *screen, void *priv) rctx->context.screen = screen; rctx->context.priv = priv; rctx->context.destroy = r600_destroy_context; - rctx->context.draw_arrays = r600_draw_arrays; - rctx->context.draw_elements = r600_draw_elements; - rctx->context.draw_range_elements = r600_draw_range_elements; rctx->context.draw_vbo = r600_draw_vbo; rctx->context.flush = r600_flush; diff --git a/src/gallium/drivers/r600/r600_context.h b/src/gallium/drivers/r600/r600_context.h index 9427c19d05..97c8a46bb0 100644 --- a/src/gallium/drivers/r600/r600_context.h +++ b/src/gallium/drivers/r600/r600_context.h @@ -191,17 +191,6 @@ struct r600_context_state *r600_context_state_decref(struct r600_context_state * int r600_context_hw_states(struct r600_context *rctx); -void r600_draw_arrays(struct pipe_context *ctx, unsigned mode, - unsigned start, unsigned count); -void r600_draw_elements(struct pipe_context *ctx, - struct pipe_resource *index_buffer, - unsigned index_size, int index_bias, unsigned mode, - unsigned start, unsigned count); -void r600_draw_range_elements(struct pipe_context *ctx, - struct pipe_resource *index_buffer, - unsigned index_size, int index_bias, unsigned min_index, - unsigned max_index, unsigned mode, - unsigned start, unsigned count); void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info); diff --git a/src/gallium/drivers/r600/r600_draw.c b/src/gallium/drivers/r600/r600_draw.c index eeaa677edb..c52dfd3fb3 100644 --- a/src/gallium/drivers/r600/r600_draw.c +++ b/src/gallium/drivers/r600/r600_draw.c @@ -176,56 +176,6 @@ static int r600_draw_common(struct r600_draw *draw) return 0; } -void r600_draw_range_elements(struct pipe_context *ctx, - struct pipe_resource *index_buffer, - unsigned index_size, int index_bias, unsigned min_index, - unsigned max_index, unsigned mode, - unsigned start, unsigned count) -{ - struct r600_draw draw; - assert(index_bias == 0); - - draw.ctx = ctx; - draw.mode = mode; - draw.start = start; - draw.count = count; - draw.index_size = index_size; - draw.index_buffer = index_buffer; -printf("index_size %d min %d max %d start %d count %d\n", index_size, min_index, max_index, start, count); - r600_draw_common(&draw); -} - -void r600_draw_elements(struct pipe_context *ctx, - struct pipe_resource *index_buffer, - unsigned index_size, int index_bias, unsigned mode, - unsigned start, unsigned count) -{ - struct r600_draw draw; - assert(index_bias == 0); - - draw.ctx = ctx; - draw.mode = mode; - draw.start = start; - draw.count = count; - draw.index_size = index_size; - draw.index_buffer = index_buffer; - r600_draw_common(&draw); -} - -void r600_draw_arrays(struct pipe_context *ctx, unsigned mode, - unsigned start, unsigned count) -{ - struct r600_draw draw; - - draw.ctx = ctx; - draw.mode = mode; - draw.start = start; - draw.count = count; - draw.index_size = 0; - draw.index_buffer = NULL; - r600_draw_common(&draw); -} - void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info) { struct r600_context *rctx = r600_context(ctx); diff --git a/src/gallium/drivers/rbug/rbug_context.c b/src/gallium/drivers/rbug/rbug_context.c index c748073b2a..3ffda87520 100644 --- a/src/gallium/drivers/rbug/rbug_context.c +++ b/src/gallium/drivers/rbug/rbug_context.c @@ -102,89 +102,6 @@ rbug_draw_block_locked(struct rbug_context *rb_pipe, int flag) } -static void -rbug_draw_arrays(struct pipe_context *_pipe, - unsigned prim, - unsigned start, - unsigned count) -{ - struct rbug_context *rb_pipe = rbug_context(_pipe); - struct pipe_context *pipe = rb_pipe->pipe; - - pipe_mutex_lock(rb_pipe->draw_mutex); - rbug_draw_block_locked(rb_pipe, RBUG_BLOCK_BEFORE); - - pipe->draw_arrays(pipe, - prim, - start, - count); - - rbug_draw_block_locked(rb_pipe, RBUG_BLOCK_AFTER); - pipe_mutex_unlock(rb_pipe->draw_mutex); -} - -static void -rbug_draw_elements(struct pipe_context *_pipe, - struct pipe_resource *_indexResource, - unsigned indexSize, - int indexBias, - unsigned prim, - unsigned start, - unsigned count) -{ - struct rbug_context *rb_pipe = rbug_context(_pipe); - struct rbug_resource *rb_resource = rbug_resource(_indexResource); - struct pipe_context *pipe = rb_pipe->pipe; - struct pipe_resource *indexResource = rb_resource->resource; - - pipe_mutex_lock(rb_pipe->draw_mutex); - rbug_draw_block_locked(rb_pipe, RBUG_BLOCK_BEFORE); - - pipe->draw_elements(pipe, - indexResource, - indexSize, - indexBias, - prim, - start, - count); - - rbug_draw_block_locked(rb_pipe, RBUG_BLOCK_AFTER); - pipe_mutex_unlock(rb_pipe->draw_mutex); -} - -static void -rbug_draw_range_elements(struct pipe_context *_pipe, - struct pipe_resource *_indexResource, - unsigned indexSize, - int indexBias, - unsigned minIndex, - unsigned maxIndex, - unsigned mode, - unsigned start, - unsigned count) -{ - struct rbug_context *rb_pipe = rbug_context(_pipe); - struct rbug_resource *rb_resource = rbug_resource(_indexResource); - struct pipe_context *pipe = rb_pipe->pipe; - struct pipe_resource *indexResource = rb_resource->resource; - - pipe_mutex_lock(rb_pipe->draw_mutex); - rbug_draw_block_locked(rb_pipe, RBUG_BLOCK_BEFORE); - - pipe->draw_range_elements(pipe, - indexResource, - indexSize, - indexBias, - minIndex, - maxIndex, - mode, - start, - count); - - rbug_draw_block_locked(rb_pipe, RBUG_BLOCK_AFTER); - pipe_mutex_unlock(rb_pipe->draw_mutex); -} - static void rbug_draw_vbo(struct pipe_context *_pipe, const struct pipe_draw_info *info) { @@ -1072,9 +989,6 @@ rbug_context_create(struct pipe_screen *_screen, struct pipe_context *pipe) rb_pipe->base.draw = NULL; rb_pipe->base.destroy = rbug_destroy; - rb_pipe->base.draw_arrays = rbug_draw_arrays; - rb_pipe->base.draw_elements = rbug_draw_elements; - rb_pipe->base.draw_range_elements = rbug_draw_range_elements; rb_pipe->base.draw_vbo = rbug_draw_vbo; rb_pipe->base.create_query = rbug_create_query; rb_pipe->base.destroy_query = rbug_destroy_query; diff --git a/src/gallium/drivers/softpipe/sp_context.c b/src/gallium/drivers/softpipe/sp_context.c index fa1fae6f00..a7c9959b3e 100644 --- a/src/gallium/drivers/softpipe/sp_context.c +++ b/src/gallium/drivers/softpipe/sp_context.c @@ -284,11 +284,6 @@ softpipe_create_context( struct pipe_screen *screen, softpipe->pipe.set_vertex_buffers = softpipe_set_vertex_buffers; softpipe->pipe.set_index_buffer = softpipe_set_index_buffer; - softpipe->pipe.draw_arrays = softpipe_draw_arrays; - softpipe->pipe.draw_elements = softpipe_draw_elements; - softpipe->pipe.draw_range_elements = softpipe_draw_range_elements; - softpipe->pipe.draw_arrays_instanced = softpipe_draw_arrays_instanced; - softpipe->pipe.draw_elements_instanced = softpipe_draw_elements_instanced; softpipe->pipe.draw_vbo = softpipe_draw_vbo; softpipe->pipe.draw_stream_output = softpipe_draw_stream_output; diff --git a/src/gallium/drivers/softpipe/sp_draw_arrays.c b/src/gallium/drivers/softpipe/sp_draw_arrays.c index 2855f55a0e..3a33cdef96 100644 --- a/src/gallium/drivers/softpipe/sp_draw_arrays.c +++ b/src/gallium/drivers/softpipe/sp_draw_arrays.c @@ -35,7 +35,6 @@ #include "pipe/p_context.h" #include "util/u_inlines.h" #include "util/u_prim.h" -#include "util/u_draw_quad.h" #include "sp_context.h" #include "sp_query.h" @@ -173,152 +172,3 @@ softpipe_draw_vbo(struct pipe_context *pipe, /* Note: leave drawing surfaces mapped */ sp->dirty_render_cache = TRUE; } - -static void -softpipe_draw_range_elements_instanced(struct pipe_context *pipe, - struct pipe_resource *indexBuffer, - unsigned indexSize, - int indexBias, - unsigned minIndex, - unsigned maxIndex, - unsigned mode, - unsigned start, - unsigned count, - unsigned startInstance, - unsigned instanceCount) -{ - struct softpipe_context *sp = softpipe_context(pipe); - struct pipe_draw_info info; - struct pipe_index_buffer saved_ib, ib; - - util_draw_init_info(&info); - info.mode = mode; - info.start = start; - info.count = count; - info.start_instance = startInstance; - info.instance_count = instanceCount; - info.index_bias = indexBias; - info.min_index = minIndex; - info.max_index = maxIndex; - - if (indexBuffer) { - info.indexed = TRUE; - - saved_ib = sp->index_buffer; - ib.buffer = indexBuffer; - ib.offset = 0; - ib.index_size = indexSize; - pipe->set_index_buffer(pipe, &ib); - } - - softpipe_draw_vbo(pipe, &info); - - if (indexBuffer) - pipe->set_index_buffer(pipe, &saved_ib); -} - - -void -softpipe_draw_range_elements(struct pipe_context *pipe, - struct pipe_resource *indexBuffer, - unsigned indexSize, - int indexBias, - unsigned min_index, - unsigned max_index, - unsigned mode, unsigned start, unsigned count) -{ - softpipe_draw_range_elements_instanced(pipe, - indexBuffer, - indexSize, - indexBias, - min_index, - max_index, - mode, - start, - count, - 0, - 1); -} - - -void -softpipe_draw_elements(struct pipe_context *pipe, - struct pipe_resource *indexBuffer, - unsigned indexSize, int indexBias, - unsigned mode, unsigned start, unsigned count) -{ - softpipe_draw_range_elements_instanced(pipe, - indexBuffer, - indexSize, - indexBias, - 0, - 0xffffffff, - mode, - start, - count, - 0, - 1); -} - -void -softpipe_draw_arrays_instanced(struct pipe_context *pipe, - unsigned mode, - unsigned start, - unsigned count, - unsigned startInstance, - unsigned instanceCount) -{ - softpipe_draw_range_elements_instanced(pipe, - NULL, - 0, - 0, - 0, - 0xffffffff, - mode, - start, - count, - startInstance, - instanceCount); -} - -void -softpipe_draw_elements_instanced(struct pipe_context *pipe, - struct pipe_resource *indexBuffer, - unsigned indexSize, - int indexBias, - unsigned mode, - unsigned start, - unsigned count, - unsigned startInstance, - unsigned instanceCount) -{ - softpipe_draw_range_elements_instanced(pipe, - indexBuffer, - indexSize, - indexBias, - 0, - 0xffffffff, - mode, - start, - count, - startInstance, - instanceCount); -} - -void -softpipe_draw_arrays(struct pipe_context *pipe, unsigned mode, - unsigned start, unsigned count) -{ - softpipe_draw_range_elements_instanced(pipe, - NULL, - 0, - 0, - 0, - 0xffffffff, - mode, - start, - count, - 0, - 1); -} - diff --git a/src/gallium/drivers/softpipe/sp_state.h b/src/gallium/drivers/softpipe/sp_state.h index f04b0a5d31..39d204de8a 100644 --- a/src/gallium/drivers/softpipe/sp_state.h +++ b/src/gallium/drivers/softpipe/sp_state.h @@ -228,41 +228,6 @@ void softpipe_set_index_buffer(struct pipe_context *, void softpipe_update_derived( struct softpipe_context *softpipe ); -void softpipe_draw_arrays(struct pipe_context *pipe, unsigned mode, - unsigned start, unsigned count); - -void softpipe_draw_elements(struct pipe_context *pipe, - struct pipe_resource *indexBuffer, - unsigned indexSize, int indexBias, - unsigned mode, unsigned start, unsigned count); -void -softpipe_draw_range_elements(struct pipe_context *pipe, - struct pipe_resource *indexBuffer, - unsigned indexSize, - int indexBias, - unsigned min_index, - unsigned max_index, - unsigned mode, unsigned start, unsigned count); - -void -softpipe_draw_arrays_instanced(struct pipe_context *pipe, - unsigned mode, - unsigned start, - unsigned count, - unsigned startInstance, - unsigned instanceCount); - -void -softpipe_draw_elements_instanced(struct pipe_context *pipe, - struct pipe_resource *indexBuffer, - unsigned indexSize, - int indexBias, - unsigned mode, - unsigned start, - unsigned count, - unsigned startInstance, - unsigned instanceCount); - void softpipe_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info); diff --git a/src/gallium/drivers/svga/svga_pipe_draw.c b/src/gallium/drivers/svga/svga_pipe_draw.c index fceaa83d70..de08bc5e56 100644 --- a/src/gallium/drivers/svga/svga_pipe_draw.c +++ b/src/gallium/drivers/svga/svga_pipe_draw.c @@ -226,28 +226,6 @@ svga_draw_range_elements( struct pipe_context *pipe, } -static void -svga_draw_elements( struct pipe_context *pipe, - struct pipe_resource *index_buffer, - unsigned index_size, int index_bias, - unsigned prim, unsigned start, unsigned count) -{ - svga_draw_range_elements( pipe, index_buffer, - index_size, index_bias, - 0, 0xffffffff, - prim, start, count ); -} - -static void -svga_draw_arrays( struct pipe_context *pipe, - unsigned prim, unsigned start, unsigned count) -{ - svga_draw_range_elements(pipe, NULL, 0, 0, - start, start + count - 1, - prim, - start, count); -} - static void svga_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) { @@ -274,8 +252,5 @@ svga_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) void svga_init_draw_functions( struct svga_context *svga ) { - svga->pipe.draw_arrays = svga_draw_arrays; - svga->pipe.draw_elements = svga_draw_elements; - svga->pipe.draw_range_elements = svga_draw_range_elements; svga->pipe.draw_vbo = svga_draw_vbo; } diff --git a/src/gallium/drivers/trace/tr_context.c b/src/gallium/drivers/trace/tr_context.c index 91c9bf0999..84e5a6a824 100644 --- a/src/gallium/drivers/trace/tr_context.c +++ b/src/gallium/drivers/trace/tr_context.c @@ -82,91 +82,6 @@ trace_surface_unwrap(struct trace_context *tr_ctx, } -static INLINE void -trace_context_draw_arrays(struct pipe_context *_pipe, - unsigned mode, unsigned start, unsigned count) -{ - struct trace_context *tr_ctx = trace_context(_pipe); - struct pipe_context *pipe = tr_ctx->pipe; - - trace_dump_call_begin("pipe_context", "draw_arrays"); - - trace_dump_arg(ptr, pipe); - trace_dump_arg(uint, mode); - trace_dump_arg(uint, start); - trace_dump_arg(uint, count); - - pipe->draw_arrays(pipe, mode, start, count); - - trace_dump_call_end(); -} - - -static INLINE void -trace_context_draw_elements(struct pipe_context *_pipe, - struct pipe_resource *_indexBuffer, - unsigned indexSize, int indexBias, - unsigned mode, unsigned start, unsigned count) -{ - struct trace_context *tr_ctx = trace_context(_pipe); - struct trace_resource *tr_buf = trace_resource(_indexBuffer); - struct pipe_context *pipe = tr_ctx->pipe; - struct pipe_resource *indexBuffer = tr_buf->resource; - - trace_dump_call_begin("pipe_context", "draw_elements"); - - trace_dump_arg(ptr, pipe); - trace_dump_arg(ptr, indexBuffer); - trace_dump_arg(uint, indexSize); - trace_dump_arg(int, indexBias); - trace_dump_arg(uint, mode); - trace_dump_arg(uint, start); - trace_dump_arg(uint, count); - - pipe->draw_elements(pipe, indexBuffer, indexSize, indexBias, - mode, start, count); - - trace_dump_call_end(); -} - - -static INLINE void -trace_context_draw_range_elements(struct pipe_context *_pipe, - struct pipe_resource *_indexBuffer, - unsigned indexSize, - int indexBias, - unsigned minIndex, - unsigned maxIndex, - unsigned mode, - unsigned start, - unsigned count) -{ - struct trace_context *tr_ctx = trace_context(_pipe); - struct trace_resource *tr_buf = trace_resource(_indexBuffer); - struct pipe_context *pipe = tr_ctx->pipe; - struct pipe_resource *indexBuffer = tr_buf->resource; - - trace_dump_call_begin("pipe_context", "draw_range_elements"); - - trace_dump_arg(ptr, pipe); - trace_dump_arg(ptr, indexBuffer); - trace_dump_arg(uint, indexSize); - trace_dump_arg(int, indexBias); - trace_dump_arg(uint, minIndex); - trace_dump_arg(uint, maxIndex); - trace_dump_arg(uint, mode); - trace_dump_arg(uint, start); - trace_dump_arg(uint, count); - - pipe->draw_range_elements(pipe, - indexBuffer, indexSize, indexBias, - minIndex, maxIndex, - mode, start, count); - - trace_dump_call_end(); -} - - static INLINE void trace_context_draw_vbo(struct pipe_context *_pipe, const struct pipe_draw_info *info) @@ -1483,9 +1398,6 @@ trace_context_create(struct trace_screen *tr_scr, tr_ctx->base.screen = &tr_scr->base; tr_ctx->base.destroy = trace_context_destroy; - tr_ctx->base.draw_arrays = trace_context_draw_arrays; - tr_ctx->base.draw_elements = trace_context_draw_elements; - tr_ctx->base.draw_range_elements = trace_context_draw_range_elements; tr_ctx->base.draw_vbo = trace_context_draw_vbo; tr_ctx->base.create_query = trace_context_create_query; tr_ctx->base.destroy_query = trace_context_destroy_query; diff --git a/src/gallium/include/pipe/p_context.h b/src/gallium/include/pipe/p_context.h index 3314b1e0e0..0579962ec6 100644 --- a/src/gallium/include/pipe/p_context.h +++ b/src/gallium/include/pipe/p_context.h @@ -61,47 +61,6 @@ struct pipe_context { * VBO drawing */ /*@{*/ - void (*draw_arrays)( struct pipe_context *pipe, - unsigned mode, unsigned start, unsigned count); - - void (*draw_elements)( struct pipe_context *pipe, - struct pipe_resource *indexBuffer, - unsigned indexSize, - int indexBias, - unsigned mode, unsigned start, unsigned count); - - void (*draw_arrays_instanced)(struct pipe_context *pipe, - unsigned mode, - unsigned start, - unsigned count, - unsigned startInstance, - unsigned instanceCount); - - void (*draw_elements_instanced)(struct pipe_context *pipe, - struct pipe_resource *indexBuffer, - unsigned indexSize, - int indexBias, - unsigned mode, - unsigned start, - unsigned count, - unsigned startInstance, - unsigned instanceCount); - - /* XXX: this is (probably) a temporary entrypoint, as the range - * information should be available from the vertex_buffer state. - * Using this to quickly evaluate a specialized path in the draw - * module. - */ - void (*draw_range_elements)( struct pipe_context *pipe, - struct pipe_resource *indexBuffer, - unsigned indexSize, - int indexBias, - unsigned minIndex, - unsigned maxIndex, - unsigned mode, - unsigned start, - unsigned count); - void (*draw_vbo)( struct pipe_context *pipe, const struct pipe_draw_info *info ); -- cgit v1.2.3 From 92f9b05499de9f0e8eda11d1a75ba7955c9cc602 Mon Sep 17 00:00:00 2001 From: Chia-I Wu Date: Thu, 29 Jul 2010 15:54:10 +0800 Subject: gallium: Avoid void pointer arithmetic. This fixes fdo bug #29286. --- src/gallium/drivers/i915/i915_context.c | 4 ++-- src/gallium/drivers/llvmpipe/lp_draw_arrays.c | 4 ++-- src/gallium/drivers/r300/r300_render.c | 2 +- src/gallium/drivers/softpipe/sp_draw_arrays.c | 4 ++-- 4 files changed, 7 insertions(+), 7 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/i915/i915_context.c b/src/gallium/drivers/i915/i915_context.c index 496efc99da..2beb9e3091 100644 --- a/src/gallium/drivers/i915/i915_context.c +++ b/src/gallium/drivers/i915/i915_context.c @@ -67,8 +67,8 @@ i915_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) * Map index buffer, if present */ if (info->indexed && i915->index_buffer.buffer) { - mapped_indices = i915_buffer(i915->index_buffer.buffer)->data; - mapped_indices += i915->index_buffer.offset; + char *indices = (char *) i915_buffer(i915->index_buffer.buffer)->data; + mapped_indices = (void *) (indices + i915->index_buffer.offset); } draw_set_mapped_element_buffer_range(draw, (mapped_indices) ? diff --git a/src/gallium/drivers/llvmpipe/lp_draw_arrays.c b/src/gallium/drivers/llvmpipe/lp_draw_arrays.c index 22c2836e22..e73b431cb4 100644 --- a/src/gallium/drivers/llvmpipe/lp_draw_arrays.c +++ b/src/gallium/drivers/llvmpipe/lp_draw_arrays.c @@ -69,8 +69,8 @@ llvmpipe_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) /* Map index buffer, if present */ if (info->indexed && lp->index_buffer.buffer) { - mapped_indices = llvmpipe_resource_data(lp->index_buffer.buffer); - mapped_indices += lp->index_buffer.offset; + char *indices = (char *) llvmpipe_resource_data(lp->index_buffer.buffer); + mapped_indices = (void *) (indices + lp->index_buffer.offset); } draw_set_mapped_element_buffer_range(draw, (mapped_indices) ? diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c index c179b07e86..35d7756584 100644 --- a/src/gallium/drivers/r300/r300_render.c +++ b/src/gallium/drivers/r300/r300_render.c @@ -692,7 +692,7 @@ static void r300_swtcl_draw_vbo(struct pipe_context* pipe, indices = pipe_buffer_map(pipe, r300->index_buffer.buffer, PIPE_TRANSFER_READ, &ib_transfer); if (indices) - indices += r300->index_buffer.offset; + indices = (void *) ((char *) indices + r300->index_buffer.offset); } draw_set_mapped_element_buffer_range(r300->draw, (indices) ? diff --git a/src/gallium/drivers/softpipe/sp_draw_arrays.c b/src/gallium/drivers/softpipe/sp_draw_arrays.c index 3a33cdef96..386c8acb8c 100644 --- a/src/gallium/drivers/softpipe/sp_draw_arrays.c +++ b/src/gallium/drivers/softpipe/sp_draw_arrays.c @@ -139,8 +139,8 @@ softpipe_draw_vbo(struct pipe_context *pipe, /* Map index buffer, if present */ if (info->indexed && sp->index_buffer.buffer) { - mapped_indices = softpipe_resource(sp->index_buffer.buffer)->data; - mapped_indices += sp->index_buffer.offset; + char *indices = (char *) softpipe_resource(sp->index_buffer.buffer)->data; + mapped_indices = (void *) (indices + sp->index_buffer.offset); } draw_set_mapped_element_buffer_range(draw, (mapped_indices) ? -- cgit v1.2.3 From e3d2ebac115f7b7899664fefc2652fb829acfa27 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Thu, 22 Jul 2010 18:37:46 +0100 Subject: llvmpipe: Avoid corrupting the FPU stack with MMX instructions on 32bit OSes. Unfortunately LLVM doesn't emit EMMS itself, and there is no easy/effective way to disable MMX. http://llvm.org/bugs/show_bug.cgi?id=3287 --- src/gallium/auxiliary/gallivm/lp_bld_misc.cpp | 24 ++++++++++++++++++++++++ src/gallium/drivers/llvmpipe/lp_state_fs.c | 5 +++++ 2 files changed, 29 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp index 5a9488b5f7..072408b268 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp +++ b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp @@ -39,6 +39,7 @@ #include #include #include +#include #include "pipe/p_config.h" #include "util/u_debug.h" @@ -141,4 +142,27 @@ lp_set_target_options(void) #if 0 llvm::UnsafeFPMath = true; #endif + +#if 0 + /* + * LLVM will generate MMX instructions for vectors <= 64 bits, leading to + * innefficient code, and in 32bit systems, to the corruption of the FPU + * stack given that it expects the user to generate the EMMS instructions. + * + * See also: + * - http://llvm.org/bugs/show_bug.cgi?id=3287 + * - http://l4.me.uk/post/2009/06/07/llvm-wrinkle-3-configuration-what-configuration/ + * + * XXX: Unfortunately this is not working. + */ + static boolean first = FALSE; + if (first) { + static const char* options[] = { + "prog", + "-disable-mmx" + }; + llvm::cl::ParseCommandLineOptions(2, const_cast(options)); + first = FALSE; + } +#endif } diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c index 5953d690a4..dbcc286417 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c @@ -676,6 +676,11 @@ generate_fragment(struct llvmpipe_context *lp, color_ptr); } +#ifdef PIPE_ARCH_X86 + /* Avoid corrupting the FPU stack on 32bit OSes. */ + lp_build_intrinsic(builder, "llvm.x86.mmx.emms", LLVMVoidType(), NULL, 0); +#endif + LLVMBuildRetVoid(builder); LLVMDisposeBuilder(builder); -- cgit v1.2.3 From bdaa8be5b7dce396dd98f05e7ad66d48633cc4cb Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Tue, 20 Jul 2010 13:40:01 +0100 Subject: scons: Use the current python executable for code generation. Less susceptible to be broken. --- src/gallium/auxiliary/SConscript | 4 ++-- src/gallium/drivers/llvmpipe/SConscript | 3 ++- 2 files changed, 4 insertions(+), 3 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/auxiliary/SConscript b/src/gallium/auxiliary/SConscript index 72a16617db..8381ae5b3e 100644 --- a/src/gallium/auxiliary/SConscript +++ b/src/gallium/auxiliary/SConscript @@ -34,14 +34,14 @@ env.CodeGenerate( target = 'util/u_format_table.c', script = '#src/gallium/auxiliary/util/u_format_table.py', source = ['#src/gallium/auxiliary/util/u_format.csv'], - command = 'python $SCRIPT $SOURCE > $TARGET' + command = python_cmd + ' $SCRIPT $SOURCE > $TARGET' ) env.CodeGenerate( target = 'util/u_half.c', script = 'util/u_half.py', source = [], - command = 'python $SCRIPT > $TARGET' + command = python_cmd + ' $SCRIPT > $TARGET' ) env.Depends('util/u_format_table.c', [ diff --git a/src/gallium/drivers/llvmpipe/SConscript b/src/gallium/drivers/llvmpipe/SConscript index fd6ba1561e..5583fca38e 100644 --- a/src/gallium/drivers/llvmpipe/SConscript +++ b/src/gallium/drivers/llvmpipe/SConscript @@ -1,3 +1,4 @@ +from sys import executable as python_cmd import distutils.version Import('*') @@ -16,7 +17,7 @@ env.CodeGenerate( target = 'lp_tile_soa.c', script = 'lp_tile_soa.py', source = ['#src/gallium/auxiliary/util/u_format.csv'], - command = 'python $SCRIPT $SOURCE > $TARGET' + command = python_cmd + ' $SCRIPT $SOURCE > $TARGET' ) # XXX: Our dependency scanner only finds depended modules in relative dirs. -- cgit v1.2.3 From d05cb9f0187984e461b41eb1ba6ca2adf0593c74 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 29 Jul 2010 12:49:12 -0600 Subject: llvmpipe: don't call LLVMCreateJITCompiler() twice Fixes a failed assertion with LLVM 2.6: ::JITResolver::JITResolver(llvm::JIT&): Assertion `TheJITResolver == 0&& "Multiple JIT resolvers?"' failed. Though, not everyone seems to experience this problem. --- src/gallium/drivers/llvmpipe/lp_test_blend.c | 14 ++------------ src/gallium/drivers/llvmpipe/lp_test_conv.c | 14 ++------------ src/gallium/drivers/llvmpipe/lp_test_printf.c | 6 ++++++ src/gallium/drivers/llvmpipe/lp_test_round.c | 12 ++---------- src/gallium/drivers/llvmpipe/lp_test_sincos.c | 13 ++----------- 5 files changed, 14 insertions(+), 45 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/llvmpipe/lp_test_blend.c b/src/gallium/drivers/llvmpipe/lp_test_blend.c index 0c95555655..d0389f0cb0 100644 --- a/src/gallium/drivers/llvmpipe/lp_test_blend.c +++ b/src/gallium/drivers/llvmpipe/lp_test_blend.c @@ -37,6 +37,7 @@ */ +#include "gallivm/lp_bld_init.h" #include "gallivm/lp_bld_type.h" #include "gallivm/lp_bld_debug.h" #include "lp_bld_blend.h" @@ -485,8 +486,7 @@ test_one(unsigned verbose, { LLVMModuleRef module = NULL; LLVMValueRef func = NULL; - LLVMExecutionEngineRef engine = NULL; - LLVMModuleProviderRef provider = NULL; + LLVMExecutionEngineRef engine = lp_build_engine; LLVMPassManagerRef pass = NULL; char *error = NULL; blend_test_ptr_t blend_test_ptr; @@ -510,15 +510,6 @@ test_one(unsigned verbose, } LLVMDisposeMessage(error); - provider = LLVMCreateModuleProviderForExistingModule(module); - if (LLVMCreateJITCompiler(&engine, provider, 1, &error)) { - if(verbose < 1) - dump_blend_type(stderr, blend, mode, type); - fprintf(stderr, "%s\n", error); - LLVMDisposeMessage(error); - abort(); - } - #if 0 pass = LLVMCreatePassManager(); LLVMAddTargetData(LLVMGetExecutionEngineTargetData(engine), pass); @@ -735,7 +726,6 @@ test_one(unsigned verbose, LLVMFreeMachineCodeForFunction(engine, func); - LLVMDisposeExecutionEngine(engine); if(pass) LLVMDisposePassManager(pass); diff --git a/src/gallium/drivers/llvmpipe/lp_test_conv.c b/src/gallium/drivers/llvmpipe/lp_test_conv.c index cf41b40581..3ba42bf11a 100644 --- a/src/gallium/drivers/llvmpipe/lp_test_conv.c +++ b/src/gallium/drivers/llvmpipe/lp_test_conv.c @@ -35,6 +35,7 @@ #include "util/u_pointer.h" +#include "gallivm/lp_bld_init.h" #include "gallivm/lp_bld_type.h" #include "gallivm/lp_bld_const.h" #include "gallivm/lp_bld_conv.h" @@ -152,8 +153,7 @@ test_one(unsigned verbose, { LLVMModuleRef module = NULL; LLVMValueRef func = NULL; - LLVMExecutionEngineRef engine = NULL; - LLVMModuleProviderRef provider = NULL; + LLVMExecutionEngineRef engine = lp_build_engine; LLVMPassManagerRef pass = NULL; char *error = NULL; conv_test_ptr_t conv_test_ptr; @@ -203,15 +203,6 @@ test_one(unsigned verbose, } LLVMDisposeMessage(error); - provider = LLVMCreateModuleProviderForExistingModule(module); - if (LLVMCreateJITCompiler(&engine, provider, 1, &error)) { - if(verbose < 1) - dump_conv_types(stderr, src_type, dst_type); - fprintf(stderr, "%s\n", error); - LLVMDisposeMessage(error); - abort(); - } - #if 0 pass = LLVMCreatePassManager(); LLVMAddTargetData(LLVMGetExecutionEngineTargetData(engine), pass); @@ -351,7 +342,6 @@ test_one(unsigned verbose, LLVMFreeMachineCodeForFunction(engine, func); - LLVMDisposeExecutionEngine(engine); if(pass) LLVMDisposePassManager(pass); diff --git a/src/gallium/drivers/llvmpipe/lp_test_printf.c b/src/gallium/drivers/llvmpipe/lp_test_printf.c index 21df83f9d8..62041f0301 100644 --- a/src/gallium/drivers/llvmpipe/lp_test_printf.c +++ b/src/gallium/drivers/llvmpipe/lp_test_printf.c @@ -31,6 +31,7 @@ #include "util/u_pointer.h" #include "gallivm/lp_bld.h" +#include "gallivm/lp_bld_init.h" #include "gallivm/lp_bld_printf.h" #include @@ -74,6 +75,7 @@ add_printf_test(LLVMModuleRef module) lp_build_printf(builder, "hello, world\n"); lp_build_printf(builder, "print 5 6: %d %d\n", LLVMConstInt(LLVMInt32Type(), 5, 0), LLVMConstInt(LLVMInt32Type(), 6, 0)); + LLVMBuildRetVoid(builder); LLVMDisposeBuilder(builder); return func; @@ -107,11 +109,15 @@ test_printf(unsigned verbose, FILE *fp, const struct printf_test_case *testcase) LLVMDisposeMessage(error); provider = LLVMCreateModuleProviderForExistingModule(module); +#if 0 if (LLVMCreateJITCompiler(&engine, provider, 1, &error)) { fprintf(stderr, "%s\n", error); LLVMDisposeMessage(error); abort(); } +#else + engine = lp_build_engine; +#endif #if 0 pass = LLVMCreatePassManager(); diff --git a/src/gallium/drivers/llvmpipe/lp_test_round.c b/src/gallium/drivers/llvmpipe/lp_test_round.c index f571a81a4a..57b0ee5776 100644 --- a/src/gallium/drivers/llvmpipe/lp_test_round.c +++ b/src/gallium/drivers/llvmpipe/lp_test_round.c @@ -31,7 +31,7 @@ #include "util/u_pointer.h" #include "gallivm/lp_bld.h" -#include "gallivm/lp_bld_printf.h" +#include "gallivm/lp_bld_init.h" #include "gallivm/lp_bld_arit.h" #include @@ -121,8 +121,7 @@ test_round(unsigned verbose, FILE *fp) { LLVMModuleRef module = NULL; LLVMValueRef test_round = NULL, test_trunc, test_floor, test_ceil; - LLVMExecutionEngineRef engine = NULL; - LLVMModuleProviderRef provider = NULL; + LLVMExecutionEngineRef engine = lp_build_engine; LLVMPassManagerRef pass = NULL; char *error = NULL; test_round_t round_func, trunc_func, floor_func, ceil_func; @@ -145,13 +144,6 @@ test_round(unsigned verbose, FILE *fp) } LLVMDisposeMessage(error); - provider = LLVMCreateModuleProviderForExistingModule(module); - if (LLVMCreateJITCompiler(&engine, provider, 1, &error)) { - fprintf(stderr, "%s\n", error); - LLVMDisposeMessage(error); - abort(); - } - #if 0 pass = LLVMCreatePassManager(); LLVMAddTargetData(LLVMGetExecutionEngineTargetData(engine), pass); diff --git a/src/gallium/drivers/llvmpipe/lp_test_sincos.c b/src/gallium/drivers/llvmpipe/lp_test_sincos.c index 1366ecddcb..e93c1b7859 100644 --- a/src/gallium/drivers/llvmpipe/lp_test_sincos.c +++ b/src/gallium/drivers/llvmpipe/lp_test_sincos.c @@ -30,7 +30,7 @@ #include #include "gallivm/lp_bld.h" -#include "gallivm/lp_bld_printf.h" +#include "gallivm/lp_bld_init.h" #include "gallivm/lp_bld_arit.h" #include @@ -101,8 +101,7 @@ test_sincos(unsigned verbose, FILE *fp) { LLVMModuleRef module = NULL; LLVMValueRef test_sin = NULL, test_cos = NULL; - LLVMExecutionEngineRef engine = NULL; - LLVMModuleProviderRef provider = NULL; + LLVMExecutionEngineRef engine = lp_build_engine; LLVMPassManagerRef pass = NULL; char *error = NULL; test_sincos_t sin_func; @@ -122,13 +121,6 @@ test_sincos(unsigned verbose, FILE *fp) } LLVMDisposeMessage(error); - provider = LLVMCreateModuleProviderForExistingModule(module); - if (LLVMCreateJITCompiler(&engine, provider, 1, &error)) { - fprintf(stderr, "%s\n", error); - LLVMDisposeMessage(error); - abort(); - } - #if 0 pass = LLVMCreatePassManager(); LLVMAddTargetData(LLVMGetExecutionEngineTargetData(engine), pass); @@ -162,7 +154,6 @@ test_sincos(unsigned verbose, FILE *fp) LLVMFreeMachineCodeForFunction(engine, test_sin); LLVMFreeMachineCodeForFunction(engine, test_cos); - LLVMDisposeExecutionEngine(engine); if(pass) LLVMDisposePassManager(pass); -- cgit v1.2.3 From 330852b3b33883b8fb22ce8c67efae79e64ce273 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 29 Jul 2010 12:51:34 -0600 Subject: llvmpipe: also test the new lp_build_assert() function --- src/gallium/drivers/llvmpipe/lp_test_printf.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/llvmpipe/lp_test_printf.c b/src/gallium/drivers/llvmpipe/lp_test_printf.c index 62041f0301..a3447bf53f 100644 --- a/src/gallium/drivers/llvmpipe/lp_test_printf.c +++ b/src/gallium/drivers/llvmpipe/lp_test_printf.c @@ -32,6 +32,7 @@ #include "util/u_pointer.h" #include "gallivm/lp_bld.h" #include "gallivm/lp_bld_init.h" +#include "gallivm/lp_bld_assert.h" #include "gallivm/lp_bld_printf.h" #include @@ -76,6 +77,9 @@ add_printf_test(LLVMModuleRef module) lp_build_printf(builder, "print 5 6: %d %d\n", LLVMConstInt(LLVMInt32Type(), 5, 0), LLVMConstInt(LLVMInt32Type(), 6, 0)); + /* Also test lp_build_assert(). This should not fail. */ + lp_build_assert(builder, LLVMConstInt(LLVMInt32Type(), 1, 0), "assert(1)"); + LLVMBuildRetVoid(builder); LLVMDisposeBuilder(builder); return func; -- cgit v1.2.3 From 3f4680d8e229d87e62972d0632c577873944d89d Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Thu, 29 Jul 2010 19:53:36 +0200 Subject: r300g/swtcl: fix crash after the draw_vbo merge --- src/gallium/drivers/r300/r300_render.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c index 35d7756584..987fbaf6a4 100644 --- a/src/gallium/drivers/r300/r300_render.c +++ b/src/gallium/drivers/r300/r300_render.c @@ -665,7 +665,7 @@ static void r300_swtcl_draw_vbo(struct pipe_context* pipe, { struct r300_context* r300 = r300_context(pipe); struct pipe_transfer *vb_transfer[PIPE_MAX_ATTRIBS]; - struct pipe_transfer *ib_transfer; + struct pipe_transfer *ib_transfer = NULL; unsigned count = info->count; int i; void* indices = NULL; -- cgit v1.2.3 From d8d7a3e0f9629a220e2394dd7c6634f2d6a93e20 Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Thu, 29 Jul 2010 20:52:17 +0200 Subject: r300g/swtcl: fix crash in ETQW and minor fixups The Draw flush inside r300_flush was the culprit. Also, no need to flush Draw when changing a state since the flush is already inside swtcl_draw_vbo. --- src/gallium/drivers/r300/r300_flush.c | 8 -------- src/gallium/drivers/r300/r300_render.c | 22 ++++++++++++++-------- src/gallium/drivers/r300/r300_state.c | 10 ---------- 3 files changed, 14 insertions(+), 26 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_flush.c b/src/gallium/drivers/r300/r300_flush.c index ae7b5759e7..fe182b6615 100644 --- a/src/gallium/drivers/r300/r300_flush.c +++ b/src/gallium/drivers/r300/r300_flush.c @@ -43,14 +43,6 @@ static void r300_flush(struct pipe_context* pipe, u_upload_flush(r300->upload_vb); u_upload_flush(r300->upload_ib); - /* We probably need to flush Draw, but we may have been called from - * within Draw. This feels kludgy, but it might be the best thing. - * - * Of course, the best thing is to kill Draw with fire. :3 */ - if (r300->draw && !r300->draw->flushing) { - draw_flush(r300->draw); - } - if (r300->dirty_hw) { r300_emit_hyperz_end(r300); r300_emit_query_end(r300); diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c index 987fbaf6a4..7c4294bc9f 100644 --- a/src/gallium/drivers/r300/r300_render.c +++ b/src/gallium/drivers/r300/r300_render.c @@ -681,11 +681,13 @@ static void r300_swtcl_draw_vbo(struct pipe_context* pipe, r300_update_derived_state(r300); for (i = 0; i < r300->vertex_buffer_count; i++) { - void* buf = pipe_buffer_map(pipe, - r300->vertex_buffer[i].buffer, - PIPE_TRANSFER_READ, - &vb_transfer[i]); - draw_set_mapped_vertex_buffer(r300->draw, i, buf); + if (r300->vertex_buffer[i].buffer) { + void *buf = pipe_buffer_map(pipe, + r300->vertex_buffer[i].buffer, + PIPE_TRANSFER_READ, + &vb_transfer[i]); + draw_set_mapped_vertex_buffer(r300->draw, i, buf); + } } if (info->indexed && r300->index_buffer.buffer) { @@ -709,9 +711,11 @@ static void r300_swtcl_draw_vbo(struct pipe_context* pipe, draw_flush(r300->draw); for (i = 0; i < r300->vertex_buffer_count; i++) { - pipe_buffer_unmap(pipe, r300->vertex_buffer[i].buffer, - vb_transfer[i]); - draw_set_mapped_vertex_buffer(r300->draw, i, NULL); + if (r300->vertex_buffer[i].buffer) { + pipe_buffer_unmap(pipe, r300->vertex_buffer[i].buffer, + vb_transfer[i]); + draw_set_mapped_vertex_buffer(r300->draw, i, NULL); + } } if (ib_transfer) { @@ -796,6 +800,8 @@ static void* r300_render_map_vertices(struct vbuf_render* render) PIPE_TRANSFER_WRITE, &r300render->vbo_transfer); + assert(r300render->vbo_ptr); + return ((uint8_t*)r300render->vbo_ptr + r300render->vbo_offset); } diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c index bccd7d7859..fced77e6f1 100644 --- a/src/gallium/drivers/r300/r300_state.c +++ b/src/gallium/drivers/r300/r300_state.c @@ -446,7 +446,6 @@ static void r300_set_clip_state(struct pipe_context* pipe, r300->clip_state.dirty = TRUE; } else { - draw_flush(r300->draw); draw_set_clip_state(r300->draw, state); } } @@ -728,10 +727,6 @@ static void return; } - if (r300->draw) { - draw_flush(r300->draw); - } - /* If nr_cbufs is changed from zero to non-zero or vice versa... */ if (!!old_state->nr_cbufs != !!state->nr_cbufs) { r300->blend_state.dirty = TRUE; @@ -1096,7 +1091,6 @@ static void r300_bind_rs_state(struct pipe_context* pipe, void* state) boolean last_two_sided_color = r300->two_sided_color; if (r300->draw && rs) { - draw_flush(r300->draw); draw_set_rasterizer_state(r300->draw, &rs->rs_draw, state); } @@ -1385,7 +1379,6 @@ static void r300_set_viewport_state(struct pipe_context* pipe, r300->viewport = *state; if (r300->draw) { - draw_flush(r300->draw); draw_set_viewport_state(r300->draw, state); viewport->vte_control = R300_VTX_XY_FMT | R300_VTX_Z_FMT; return; @@ -1486,7 +1479,6 @@ static void r300_set_vertex_buffers(struct pipe_context* pipe, } else { /* SW TCL. */ - draw_flush(r300->draw); draw_set_vertex_buffers(r300->draw, count, buffers); } @@ -1671,7 +1663,6 @@ static void r300_bind_vertex_elements_state(struct pipe_context *pipe, r300->velems = velems; if (r300->draw) { - draw_flush(r300->draw); draw_set_vertex_elements(r300->draw, velems->count, velems->velem); return; } @@ -1737,7 +1728,6 @@ static void r300_bind_vs_state(struct pipe_context* pipe, void* shader) r300->pvs_flush.dirty = TRUE; } else { - draw_flush(r300->draw); draw_bind_vertex_shader(r300->draw, (struct draw_vertex_shader*)vs->draw_vs); } -- cgit v1.2.3 From 7a73390f9126fd270d9891cd9d2bf38ef56d9b80 Mon Sep 17 00:00:00 2001 From: Jerome Glisse Date: Thu, 29 Jul 2010 14:51:06 -0400 Subject: r600g: mipmap early support + EX2/ABS instruction + culling Add mipmap support (demos/src/redbook/mipmap is working) Add EX2/ABS shader instruction support. Add face culling support. Misc fixes. Signed-off-by: Jerome Glisse --- src/gallium/drivers/r600/r600_asm.c | 2 ++ src/gallium/drivers/r600/r600_resource.h | 4 +-- src/gallium/drivers/r600/r600_shader.c | 16 +++++---- src/gallium/drivers/r600/r600_state.c | 56 ++++++++++++++++++++++++-------- src/gallium/drivers/r600/r600_texture.c | 24 +++++++------- src/gallium/drivers/r600/r600d.h | 40 +++++++++++++++++++++++ 6 files changed, 108 insertions(+), 34 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c index e678a2fdf2..e560f65dcd 100644 --- a/src/gallium/drivers/r600/r600_asm.c +++ b/src/gallium/drivers/r600/r600_asm.c @@ -294,6 +294,7 @@ int r600_bc_alu_build(struct r600_bc *bc, struct r600_bc_alu *alu, unsigned id) S_SQ_ALU_WORD0_LAST(alu->last); bc->bytecode[id++] = S_SQ_ALU_WORD1_DST_GPR(alu->dst.sel) | S_SQ_ALU_WORD1_DST_CHAN(alu->dst.chan) | + S_SQ_ALU_WORD1_CLAMP(alu->dst.clamp) | S_SQ_ALU_WORD1_OP3_SRC2_SEL(alu->src[2].sel) | S_SQ_ALU_WORD1_OP3_SRC2_CHAN(alu->src[2].chan) | S_SQ_ALU_WORD1_OP3_SRC2_NEG(alu->src[2].neg) | @@ -309,6 +310,7 @@ int r600_bc_alu_build(struct r600_bc *bc, struct r600_bc_alu *alu, unsigned id) S_SQ_ALU_WORD0_LAST(alu->last); bc->bytecode[id++] = S_SQ_ALU_WORD1_DST_GPR(alu->dst.sel) | S_SQ_ALU_WORD1_DST_CHAN(alu->dst.chan) | + S_SQ_ALU_WORD1_CLAMP(alu->dst.clamp) | S_SQ_ALU_WORD1_OP2_SRC0_ABS(alu->src[0].abs) | S_SQ_ALU_WORD1_OP2_SRC1_ABS(alu->src[1].abs) | S_SQ_ALU_WORD1_OP2_WRITE_MASK(alu->dst.write) | diff --git a/src/gallium/drivers/r600/r600_resource.h b/src/gallium/drivers/r600/r600_resource.h index 0139a3b777..bb90e76fb7 100644 --- a/src/gallium/drivers/r600/r600_resource.h +++ b/src/gallium/drivers/r600/r600_resource.h @@ -44,9 +44,9 @@ struct r600_resource_texture { struct r600_resource resource; unsigned long offset[PIPE_MAX_TEXTURE_LEVELS]; unsigned long pitch[PIPE_MAX_TEXTURE_LEVELS]; - unsigned long stride[PIPE_MAX_TEXTURE_LEVELS]; unsigned long layer_size[PIPE_MAX_TEXTURE_LEVELS]; - unsigned long stride_override; + unsigned long pitch_override; + unsigned long bpt; unsigned long size; }; diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index 3f1979b9cc..c61cc11e88 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -249,10 +249,6 @@ static int tgsi_is_supported(struct r600_shader_ctx *ctx) R600_ERR("too many dst (%d)\n", i->Instruction.NumDstRegs); return -EINVAL; } - if (i->Instruction.Saturate) { - R600_ERR("staturate unsupported\n"); - return -EINVAL; - } if (i->Instruction.Predicate) { R600_ERR("predicate unsupported\n"); return -EINVAL; @@ -507,10 +503,15 @@ static int tgsi_dst(struct r600_shader_ctx *ctx, unsigned swizzle, struct r600_bc_alu_dst *r600_dst) { + struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; + r600_dst->sel = tgsi_dst->Register.Index; r600_dst->sel += ctx->file_offset[tgsi_dst->Register.File]; r600_dst->chan = swizzle; r600_dst->write = 1; + if (inst->Instruction.Saturate) { + r600_dst->clamp = 1; + } return 0; } @@ -540,6 +541,9 @@ static int tgsi_op2(struct r600_shader_ctx *ctx) case TGSI_OPCODE_SUB: alu.src[1].neg = 1; break; + case TGSI_OPCODE_ABS: + alu.src[0].abs = 1; + break; default: break; } @@ -1040,13 +1044,13 @@ static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = { {TGSI_OPCODE_CLAMP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_FLR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_ROUND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_EX2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_EX2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, tgsi_trans}, {TGSI_OPCODE_LG2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_POW, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_XPD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, /* gap */ {32, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_ABS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_ABS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2}, {TGSI_OPCODE_RCC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_DPH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_COS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index 57879e8d8b..0191070daa 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -24,6 +24,7 @@ * Jerome Glisse */ #include +#include #include "util/u_inlines.h" #include "util/u_format.h" #include "util/u_memory.h" @@ -649,8 +650,8 @@ static struct radeon_state *r600_cb0(struct r600_context *rctx) rstate->placement[2] = RADEON_GEM_DOMAIN_GTT; rstate->placement[4] = RADEON_GEM_DOMAIN_GTT; rstate->nbo = 3; - pitch = rtex->pitch[level] / 8 - 1; - slice = rtex->pitch[level] * state->cbufs[0]->height / 64 - 1; + pitch = (rtex->pitch[level] / rtex->bpt) / 8 - 1; + slice = (rtex->pitch[level] / rtex->bpt) * state->cbufs[0]->height / 64 - 1; rstate->states[R600_CB0__CB_COLOR0_BASE] = 0x00000000; rstate->states[R600_CB0__CB_COLOR0_INFO] = 0x08110068; rstate->states[R600_CB0__CB_COLOR0_SIZE] = S_028060_PITCH_TILE_MAX(pitch) | @@ -666,6 +667,22 @@ static struct radeon_state *r600_cb0(struct r600_context *rctx) return rstate; } +int r600_db_format(unsigned pformat, unsigned *format) +{ + switch (pformat) { + case PIPE_FORMAT_Z24X8_UNORM: + *format = V_028010_DEPTH_X8_24; + return 0; + case PIPE_FORMAT_Z24_UNORM_S8_USCALED: + *format = V_028010_DEPTH_8_24; + return 0; + default: + *format = V_028010_DEPTH_INVALID; + R600_ERR("unsupported %d\n", pformat); + return -EINVAL; + } +} + static struct radeon_state *r600_db(struct r600_context *rctx) { struct r600_screen *rscreen = rctx->screen; @@ -674,7 +691,7 @@ static struct radeon_state *r600_db(struct r600_context *rctx) struct radeon_state *rstate; const struct pipe_framebuffer_state *state = &rctx->framebuffer->state.framebuffer; unsigned level = state->cbufs[0]->level; - unsigned pitch, slice; + unsigned pitch, slice, format; if (state->zsbuf == NULL) return NULL; @@ -689,10 +706,15 @@ static struct radeon_state *r600_db(struct r600_context *rctx) rstate->nbo = 1; rstate->placement[0] = RADEON_GEM_DOMAIN_VRAM; level = state->zsbuf->level; - pitch = rtex->pitch[level] / 8 - 1; - slice = rtex->pitch[level] * state->zsbuf->height / 64 - 1; + pitch = (rtex->pitch[level] / rtex->bpt) / 8 - 1; + slice = (rtex->pitch[level] / rtex->bpt) * state->zsbuf->height / 64 - 1; + if (r600_db_format(state->zsbuf->texture->format, &format)) { + radeon_state_decref(rstate); + return NULL; + } rstate->states[R600_DB__DB_DEPTH_BASE] = 0x00000000; - rstate->states[R600_DB__DB_DEPTH_INFO] = 0x00010006; + rstate->states[R600_DB__DB_DEPTH_INFO] = 0x00010000 | + S_028010_FORMAT(format); rstate->states[R600_DB__DB_DEPTH_VIEW] = 0x00000000; rstate->states[R600_DB__DB_PREFETCH_LIMIT] = (state->zsbuf->height / 8) -1; rstate->states[R600_DB__DB_DEPTH_SIZE] = S_028000_PITCH_TILE_MAX(pitch) | @@ -716,7 +738,10 @@ static struct radeon_state *r600_rasterizer(struct r600_context *rctx) return NULL; rstate->states[R600_RASTERIZER__SPI_INTERP_CONTROL_0] = 0x00000001; rstate->states[R600_RASTERIZER__PA_CL_CLIP_CNTL] = 0x00000000; - rstate->states[R600_RASTERIZER__PA_SU_SC_MODE_CNTL] = 0x00080000; + rstate->states[R600_RASTERIZER__PA_SU_SC_MODE_CNTL] = 0x00080000 | + S_028814_CULL_FRONT((state->cull_face & PIPE_FACE_FRONT) ? 1 : 0) | + S_028814_CULL_BACK((state->cull_face & PIPE_FACE_BACK) ? 1 : 0) | + S_028814_FACE(!state->front_ccw); rstate->states[R600_RASTERIZER__PA_CL_VS_OUT_CNTL] = 0x00000000; rstate->states[R600_RASTERIZER__PA_CL_NANINF_CNTL] = 0x00000000; rstate->states[R600_RASTERIZER__PA_SU_POINT_SIZE] = 0x00080008; @@ -910,6 +935,11 @@ static inline unsigned r600_tex_compare(unsigned compare) } } +static INLINE u32 S_FIXED(float value, u32 frac_bits) +{ + return value * (1 << frac_bits); +} + static struct radeon_state *r600_sampler(struct r600_context *rctx, const struct pipe_sampler_state *state, unsigned id) @@ -930,9 +960,9 @@ static struct radeon_state *r600_sampler(struct r600_context *rctx, S_03C000_DEPTH_COMPARE_FUNCTION(r600_tex_compare(state->compare_func)); /* FIXME LOD it depends on texture base level ... */ rstate->states[R600_PS_SAMPLER__SQ_TEX_SAMPLER_WORD1_0] = - S_03C004_MIN_LOD(0) | - S_03C004_MAX_LOD(0) | - S_03C004_LOD_BIAS(0); + S_03C004_MIN_LOD(S_FIXED(CLAMP(state->min_lod, 0, 15), 6)) | + S_03C004_MAX_LOD(S_FIXED(CLAMP(state->max_lod, 0, 15), 6)) | + S_03C004_LOD_BIAS(S_FIXED(CLAMP(state->lod_bias, -16, 16), 6)); rstate->states[R600_PS_SAMPLER__SQ_TEX_SAMPLER_WORD2_0] = S_03C008_TYPE(1); if (radeon_state_pm4(rstate)) { radeon_state_decref(rstate); @@ -1020,7 +1050,7 @@ static struct radeon_state *r600_resource(struct r600_context *rctx, /* FIXME properly handle first level != 0 */ rstate->states[R600_PS_RESOURCE__RESOURCE0_WORD0] = S_038000_DIM(r600_tex_dim(view->texture->target)) | - S_038000_PITCH((tmp->pitch[0] / 8) - 1) | + S_038000_PITCH(((tmp->pitch[0] / tmp->bpt) / 8) - 1) | S_038000_TEX_WIDTH(view->texture->width0 - 1); rstate->states[R600_PS_RESOURCE__RESOURCE0_WORD1] = S_038004_TEX_HEIGHT(view->texture->height0 - 1) | @@ -1036,9 +1066,9 @@ static struct radeon_state *r600_resource(struct r600_context *rctx, S_038010_NUM_FORMAT_ALL(V_038010_SQ_NUM_FORMAT_NORM) | S_038010_SRF_MODE_ALL(V_038010_SFR_MODE_NO_ZERO) | S_038010_REQUEST_SIZE(1) | - S_038010_DST_SEL_X(r600_tex_swizzle(view->swizzle_r)) | + S_038010_DST_SEL_X(r600_tex_swizzle(view->swizzle_b)) | S_038010_DST_SEL_Y(r600_tex_swizzle(view->swizzle_g)) | - S_038010_DST_SEL_Z(r600_tex_swizzle(view->swizzle_b)) | + S_038010_DST_SEL_Z(r600_tex_swizzle(view->swizzle_r)) | S_038010_DST_SEL_W(r600_tex_swizzle(view->swizzle_a)) | S_038010_BASE_LEVEL(view->first_level); rstate->states[R600_PS_RESOURCE__RESOURCE0_WORD5] = diff --git a/src/gallium/drivers/r600/r600_texture.c b/src/gallium/drivers/r600/r600_texture.c index ab20e97948..96173b0ed6 100644 --- a/src/gallium/drivers/r600/r600_texture.c +++ b/src/gallium/drivers/r600/r600_texture.c @@ -59,24 +59,22 @@ static unsigned long r600_texture_get_offset(struct r600_resource_texture *rtex, static void r600_setup_miptree(struct r600_screen *rscreen, struct r600_resource_texture *rtex) { struct pipe_resource *ptex = &rtex->resource.base.b; - unsigned long w, h, stride, size, layer_size, i, offset; + unsigned long w, h, pitch, size, layer_size, i, offset; + rtex->bpt = util_format_get_blocksize(ptex->format); for (i = 0, offset = 0; i <= ptex->last_level; i++) { w = u_minify(ptex->width0, i); h = u_minify(ptex->height0, i); - stride = align(util_format_get_stride(ptex->format, w), 32); - layer_size = stride * h; + pitch = util_format_get_stride(ptex->format, align(w, 64)); + layer_size = pitch * h; if (ptex->target == PIPE_TEXTURE_CUBE) size = layer_size * 6; else size = layer_size * u_minify(ptex->depth0, i); rtex->offset[i] = offset; rtex->layer_size[i] = layer_size; - rtex->pitch[i] = stride / util_format_get_blocksize(ptex->format); - rtex->pitch[i] += R600_TEXEL_PITCH_ALIGNMENT_MASK; - rtex->pitch[i] &= ~R600_TEXEL_PITCH_ALIGNMENT_MASK; - rtex->stride[i] = stride; - offset += align(size, 32); + rtex->pitch[i] = pitch; + offset += size; } rtex->size = offset; } @@ -183,11 +181,11 @@ struct pipe_resource *r600_texture_from_handle(struct pipe_screen *screen, pipe_reference_init(&resource->base.b.reference, 1); resource->base.b.screen = screen; resource->bo = bo; - rtex->stride_override = whandle->stride; - rtex->pitch[0] = whandle->stride / util_format_get_blocksize(templ->format); - rtex->stride[0] = whandle->stride; + rtex->pitch_override = whandle->stride; + rtex->bpt = util_format_get_blocksize(templ->format); + rtex->pitch[0] = whandle->stride; rtex->offset[0] = 0; - rtex->size = align(rtex->stride[0] * templ->height0, 32); + rtex->size = align(rtex->pitch[0] * templ->height0, 64); return &resource->base.b; } @@ -216,7 +214,7 @@ struct pipe_transfer* r600_texture_get_transfer(struct pipe_context *ctx, trans->transfer.sr = sr; trans->transfer.usage = usage; trans->transfer.box = *box; - trans->transfer.stride = rtex->stride[sr.level]; + trans->transfer.stride = rtex->pitch[sr.level]; trans->offset = r600_texture_get_offset(rtex, sr.level, box->z, sr.face); return &trans->transfer; } diff --git a/src/gallium/drivers/r600/r600d.h b/src/gallium/drivers/r600/r600d.h index 593b95c9c7..c1acfcd29e 100644 --- a/src/gallium/drivers/r600/r600d.h +++ b/src/gallium/drivers/r600/r600d.h @@ -316,6 +316,46 @@ #define S_028010_ZRANGE_PRECISION(x) (((x) & 0x1) << 31) #define G_028010_ZRANGE_PRECISION(x) (((x) >> 31) & 0x1) #define C_028010_ZRANGE_PRECISION 0x7FFFFFFF +#define R_028814_PA_SU_SC_MODE_CNTL 0x028814 +#define S_028814_CULL_FRONT(x) (((x) & 0x1) << 0) +#define G_028814_CULL_FRONT(x) (((x) >> 0) & 0x1) +#define C_028814_CULL_FRONT 0xFFFFFFFE +#define S_028814_CULL_BACK(x) (((x) & 0x1) << 1) +#define G_028814_CULL_BACK(x) (((x) >> 1) & 0x1) +#define C_028814_CULL_BACK 0xFFFFFFFD +#define S_028814_FACE(x) (((x) & 0x1) << 2) +#define G_028814_FACE(x) (((x) >> 2) & 0x1) +#define C_028814_FACE 0xFFFFFFFB +#define S_028814_POLY_MODE(x) (((x) & 0x3) << 3) +#define G_028814_POLY_MODE(x) (((x) >> 3) & 0x3) +#define C_028814_POLY_MODE 0xFFFFFFE7 +#define S_028814_POLYMODE_FRONT_PTYPE(x) (((x) & 0x7) << 5) +#define G_028814_POLYMODE_FRONT_PTYPE(x) (((x) >> 5) & 0x7) +#define C_028814_POLYMODE_FRONT_PTYPE 0xFFFFFF1F +#define S_028814_POLYMODE_BACK_PTYPE(x) (((x) & 0x7) << 8) +#define G_028814_POLYMODE_BACK_PTYPE(x) (((x) >> 8) & 0x7) +#define C_028814_POLYMODE_BACK_PTYPE 0xFFFFF8FF +#define S_028814_POLY_OFFSET_FRONT_ENABLE(x) (((x) & 0x1) << 11) +#define G_028814_POLY_OFFSET_FRONT_ENABLE(x) (((x) >> 11) & 0x1) +#define C_028814_POLY_OFFSET_FRONT_ENABLE 0xFFFFF7FF +#define S_028814_POLY_OFFSET_BACK_ENABLE(x) (((x) & 0x1) << 12) +#define G_028814_POLY_OFFSET_BACK_ENABLE(x) (((x) >> 12) & 0x1) +#define C_028814_POLY_OFFSET_BACK_ENABLE 0xFFFFEFFF +#define S_028814_POLY_OFFSET_PARA_ENABLE(x) (((x) & 0x1) << 13) +#define G_028814_POLY_OFFSET_PARA_ENABLE(x) (((x) >> 13) & 0x1) +#define C_028814_POLY_OFFSET_PARA_ENABLE 0xFFFFDFFF +#define S_028814_VTX_WINDOW_OFFSET_ENABLE(x) (((x) & 0x1) << 16) +#define G_028814_VTX_WINDOW_OFFSET_ENABLE(x) (((x) >> 16) & 0x1) +#define C_028814_VTX_WINDOW_OFFSET_ENABLE 0xFFFEFFFF +#define S_028814_PROVOKING_VTX_LAST(x) (((x) & 0x1) << 19) +#define G_028814_PROVOKING_VTX_LAST(x) (((x) >> 19) & 0x1) +#define C_028814_PROVOKING_VTX_LAST 0xFFF7FFFF +#define S_028814_PERSP_CORR_DIS(x) (((x) & 0x1) << 20) +#define G_028814_PERSP_CORR_DIS(x) (((x) >> 20) & 0x1) +#define C_028814_PERSP_CORR_DIS 0xFFEFFFFF +#define S_028814_MULTI_PRIM_IB_ENA(x) (((x) & 0x1) << 21) +#define G_028814_MULTI_PRIM_IB_ENA(x) (((x) >> 21) & 0x1) +#define C_028814_MULTI_PRIM_IB_ENA 0xFFDFFFFF #define R_028000_DB_DEPTH_SIZE 0x028000 #define S_028000_PITCH_TILE_MAX(x) (((x) & 0x3FF) << 0) #define G_028000_PITCH_TILE_MAX(x) (((x) >> 0) & 0x3FF) -- cgit v1.2.3 From d1f38baa01b8881232abfe79721296c0cf8c19a5 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 29 Jul 2010 13:26:42 -0600 Subject: llvmpipe: silence warnings in lp_test_sincos.c --- src/gallium/drivers/llvmpipe/lp_test_sincos.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/llvmpipe/lp_test_sincos.c b/src/gallium/drivers/llvmpipe/lp_test_sincos.c index e93c1b7859..7ab357f162 100644 --- a/src/gallium/drivers/llvmpipe/lp_test_sincos.c +++ b/src/gallium/drivers/llvmpipe/lp_test_sincos.c @@ -32,6 +32,7 @@ #include "gallivm/lp_bld.h" #include "gallivm/lp_bld_init.h" #include "gallivm/lp_bld_arit.h" +#include "util/u_pointer.h" #include #include @@ -136,8 +137,8 @@ test_sincos(unsigned verbose, FILE *fp) (void)pass; #endif - sin_func = (test_sincos_t)LLVMGetPointerToGlobal(engine, test_sin); - cos_func = (test_sincos_t)LLVMGetPointerToGlobal(engine, test_cos); + sin_func = (test_sincos_t) pointer_to_func(LLVMGetPointerToGlobal(engine, test_sin)); + cos_func = (test_sincos_t) pointer_to_func(LLVMGetPointerToGlobal(engine, test_cos)); memset(unpacked, 0, sizeof unpacked); -- cgit v1.2.3 From ba2cc3b8e6ad161181b67fd2575c6bc768584d23 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 29 Jul 2010 13:49:21 -0600 Subject: gallium: implement bounds checking for constant buffers Plumb the constant buffer sizes down into the tgsi interpreter where we can do bounds checking. Optional debug code warns upon out-of-bounds reading. Plus add a few other assertions in the TGSI interpreter. --- src/gallium/auxiliary/draw/draw_context.c | 11 ++++- src/gallium/auxiliary/draw/draw_gs.c | 13 ++++-- src/gallium/auxiliary/draw/draw_gs.h | 1 + src/gallium/auxiliary/draw/draw_private.h | 7 ++- .../auxiliary/draw/draw_pt_fetch_shade_pipeline.c | 4 ++ .../draw/draw_pt_fetch_shade_pipeline_llvm.c | 1 + src/gallium/auxiliary/draw/draw_vs.c | 16 ++++++- src/gallium/auxiliary/draw/draw_vs.h | 3 +- src/gallium/auxiliary/draw/draw_vs_exec.c | 8 ++-- src/gallium/auxiliary/draw/draw_vs_llvm.c | 1 + src/gallium/auxiliary/draw/draw_vs_varient.c | 6 ++- src/gallium/auxiliary/tgsi/tgsi_exec.c | 53 +++++++++++++++++++--- src/gallium/auxiliary/tgsi/tgsi_exec.h | 10 ++++ src/gallium/drivers/softpipe/sp_context.h | 1 + src/gallium/drivers/softpipe/sp_quad_fs.c | 7 +-- src/gallium/drivers/softpipe/sp_state_fs.c | 2 + 16 files changed, 117 insertions(+), 27 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/auxiliary/draw/draw_context.c b/src/gallium/auxiliary/draw/draw_context.c index c127f74188..995b675b9a 100644 --- a/src/gallium/auxiliary/draw/draw_context.c +++ b/src/gallium/auxiliary/draw/draw_context.c @@ -288,12 +288,19 @@ draw_set_mapped_constant_buffer(struct draw_context *draw, shader_type == PIPE_SHADER_GEOMETRY); debug_assert(slot < PIPE_MAX_CONSTANT_BUFFERS); - if (shader_type == PIPE_SHADER_VERTEX) { + switch (shader_type) { + case PIPE_SHADER_VERTEX: draw->pt.user.vs_constants[slot] = buffer; + draw->pt.user.vs_constants_size[slot] = size; draw_vs_set_constants(draw, slot, buffer, size); - } else if (shader_type == PIPE_SHADER_GEOMETRY) { + break; + case PIPE_SHADER_GEOMETRY: draw->pt.user.gs_constants[slot] = buffer; + draw->pt.user.gs_constants_size[slot] = size; draw_gs_set_constants(draw, slot, buffer, size); + break; + default: + assert(0 && "invalid shader type in draw_set_mapped_constant_buffer"); } } diff --git a/src/gallium/auxiliary/draw/draw_gs.c b/src/gallium/auxiliary/draw/draw_gs.c index 0c590f936b..a36321d910 100644 --- a/src/gallium/auxiliary/draw/draw_gs.c +++ b/src/gallium/auxiliary/draw/draw_gs.c @@ -75,7 +75,7 @@ draw_gs_set_constants(struct draw_context *draw, const void *constants, unsigned size) { - /* noop */ + debug_printf("draw_gs_set_constants() not implemented yet!\n"); } @@ -394,8 +394,13 @@ static void gs_tri_adj(struct draw_geometry_shader *shader, const ushort *elts = input_prims->elts; #include "draw_gs_tmp.h" + +/** + * Execute geometry shader using TGSI interpreter. + */ int draw_geometry_shader_run(struct draw_geometry_shader *shader, const void *constants[PIPE_MAX_CONSTANT_BUFFERS], + const unsigned constants_size[PIPE_MAX_CONSTANT_BUFFERS], const struct draw_vertex_info *input_verts, const struct draw_prim_info *input_prim, struct draw_vertex_info *output_verts, @@ -405,7 +410,6 @@ int draw_geometry_shader_run(struct draw_geometry_shader *shader, unsigned input_stride = input_verts->vertex_size; unsigned vertex_size = input_verts->vertex_size; struct tgsi_exec_machine *machine = shader->machine; - unsigned int i; unsigned num_input_verts = input_prim->linear ? input_verts->count : input_prim->count; @@ -447,9 +451,8 @@ int draw_geometry_shader_run(struct draw_geometry_shader *shader, } shader->primitive_lengths = MALLOC(max_out_prims * sizeof(unsigned)); - for (i = 0; i < PIPE_MAX_CONSTANT_BUFFERS; i++) { - machine->Consts[i] = constants[i]; - } + tgsi_exec_set_constant_buffers(machine, PIPE_MAX_CONSTANT_BUFFERS, + constants, constants_size); if (input_prim->linear) gs_run(shader, input_prim, input_verts, diff --git a/src/gallium/auxiliary/draw/draw_gs.h b/src/gallium/auxiliary/draw/draw_gs.h index 06f4b822a2..67bc1aa73f 100644 --- a/src/gallium/auxiliary/draw/draw_gs.h +++ b/src/gallium/auxiliary/draw/draw_gs.h @@ -73,6 +73,7 @@ struct draw_geometry_shader { */ int draw_geometry_shader_run(struct draw_geometry_shader *shader, const void *constants[PIPE_MAX_CONSTANT_BUFFERS], + const unsigned constants_size[PIPE_MAX_CONSTANT_BUFFERS], const struct draw_vertex_info *input_verts, const struct draw_prim_info *input_prim, struct draw_vertex_info *output_verts, diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h index 058aeedc17..397d4bf653 100644 --- a/src/gallium/auxiliary/draw/draw_private.h +++ b/src/gallium/auxiliary/draw/draw_private.h @@ -163,9 +163,11 @@ struct draw_context /** vertex arrays */ const void *vbuffer[PIPE_MAX_ATTRIBS]; - /** constant buffer (for vertex/geometry shader) */ + /** constant buffers (for vertex/geometry shader) */ const void *vs_constants[PIPE_MAX_CONSTANT_BUFFERS]; + unsigned vs_constants_size[PIPE_MAX_CONSTANT_BUFFERS]; const void *gs_constants[PIPE_MAX_CONSTANT_BUFFERS]; + unsigned gs_constants_size[PIPE_MAX_CONSTANT_BUFFERS]; } user; boolean test_fse; /* enable FSE even though its not correct (eg for softpipe) */ @@ -198,6 +200,7 @@ struct draw_context struct pipe_viewport_state viewport; boolean identity_viewport; + /** Vertex shader state */ struct { struct draw_vertex_shader *vertex_shader; uint num_vs_outputs; /**< convenience, from vertex_shader */ @@ -227,6 +230,7 @@ struct draw_context struct translate_cache *emit_cache; } vs; + /** Geometry shader state */ struct { struct draw_geometry_shader *geometry_shader; uint num_gs_outputs; /**< convenience, from geometry_shader */ @@ -239,6 +243,7 @@ struct draw_context struct tgsi_sampler **samplers; } gs; + /** Stream output (vertex feedback) state */ struct { struct pipe_stream_output_state state; void *buffers[PIPE_MAX_SO_BUFFERS]; diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c index 121dfc414a..5b16c3788e 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c @@ -176,6 +176,7 @@ static void emit(struct pt_emit *emit, static void draw_vertex_shader_run(struct draw_vertex_shader *vshader, const void *constants[PIPE_MAX_CONSTANT_BUFFERS], + unsigned const_size[PIPE_MAX_CONSTANT_BUFFERS], const struct draw_vertex_info *input_verts, struct draw_vertex_info *output_verts ) { @@ -190,6 +191,7 @@ static void draw_vertex_shader_run(struct draw_vertex_shader *vshader, (const float (*)[4])input_verts->verts->data, ( float (*)[4])output_verts->verts->data, constants, + const_size, input_verts->count, input_verts->vertex_size, input_verts->vertex_size); @@ -236,6 +238,7 @@ static void fetch_pipeline_generic( struct draw_pt_middle_end *middle, if (fpme->opt & PT_SHADE) { draw_vertex_shader_run(vshader, draw->pt.user.vs_constants, + draw->pt.user.vs_constants_size, vert_info, &vs_vert_info); @@ -246,6 +249,7 @@ static void fetch_pipeline_generic( struct draw_pt_middle_end *middle, if ((fpme->opt & PT_SHADE) && gshader) { draw_geometry_shader_run(gshader, draw->pt.user.gs_constants, + draw->pt.user.gs_constants_size, vert_info, prim_info, &gs_vert_info, diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c index 5c9db12086..4b99bee86a 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c @@ -254,6 +254,7 @@ llvm_pipeline_generic( struct draw_pt_middle_end *middle, if ((opt & PT_SHADE) && gshader) { draw_geometry_shader_run(gshader, draw->pt.user.gs_constants, + draw->pt.user.gs_constants_size, vert_info, prim_info, &gs_vert_info, diff --git a/src/gallium/auxiliary/draw/draw_vs.c b/src/gallium/auxiliary/draw/draw_vs.c index 57ea63fc06..fb665b08ff 100644 --- a/src/gallium/auxiliary/draw/draw_vs.c +++ b/src/gallium/auxiliary/draw/draw_vs.c @@ -48,18 +48,30 @@ DEBUG_GET_ONCE_BOOL_OPTION(gallium_dump_vs, "GALLIUM_DUMP_VS", FALSE) + +/** + * Set a vertex shader constant buffer. + * \param slot which constant buffer in [0, PIPE_MAX_CONSTANT_BUFFERS-1] + * \param constants the mapped buffer + * \param size size of buffer in bytes + */ void draw_vs_set_constants(struct draw_context *draw, unsigned slot, const void *constants, unsigned size) { - if (((uintptr_t)constants) & 0xf) { + const int alignment = 16; + + /* check if buffer is 16-byte aligned */ + if (((uintptr_t)constants) & (alignment - 1)) { + /* if not, copy the constants into a new, 16-byte aligned buffer */ if (size > draw->vs.const_storage_size[slot]) { if (draw->vs.aligned_constant_storage[slot]) { align_free((void *)draw->vs.aligned_constant_storage[slot]); } - draw->vs.aligned_constant_storage[slot] = align_malloc(size, 16); + draw->vs.aligned_constant_storage[slot] = + align_malloc(size, alignment); } assert(constants); memcpy((void *)draw->vs.aligned_constant_storage[slot], diff --git a/src/gallium/auxiliary/draw/draw_vs.h b/src/gallium/auxiliary/draw/draw_vs.h index a731994523..f9a038788f 100644 --- a/src/gallium/auxiliary/draw/draw_vs.h +++ b/src/gallium/auxiliary/draw/draw_vs.h @@ -133,7 +133,8 @@ struct draw_vertex_shader { void (*run_linear)( struct draw_vertex_shader *shader, const float (*input)[4], float (*output)[4], - const void *constants[PIPE_MAX_CONSTANT_BUFFERS], + const void *constants[PIPE_MAX_CONSTANT_BUFFERS], + const unsigned const_size[PIPE_MAX_CONSTANT_BUFFERS], unsigned count, unsigned input_stride, unsigned output_stride ); diff --git a/src/gallium/auxiliary/draw/draw_vs_exec.c b/src/gallium/auxiliary/draw/draw_vs_exec.c index bc34d390da..dab3eb1ca8 100644 --- a/src/gallium/auxiliary/draw/draw_vs_exec.c +++ b/src/gallium/auxiliary/draw/draw_vs_exec.c @@ -85,7 +85,8 @@ static void vs_exec_run_linear( struct draw_vertex_shader *shader, const float (*input)[4], float (*output)[4], - const void *constants[PIPE_MAX_CONSTANT_BUFFERS], + const void *constants[PIPE_MAX_CONSTANT_BUFFERS], + const unsigned const_size[PIPE_MAX_CONSTANT_BUFFERS], unsigned count, unsigned input_stride, unsigned output_stride ) @@ -95,9 +96,8 @@ vs_exec_run_linear( struct draw_vertex_shader *shader, unsigned int i, j; unsigned slot; - for (i = 0; i < PIPE_MAX_CONSTANT_BUFFERS; i++) { - machine->Consts[i] = constants[i]; - } + tgsi_exec_set_constant_buffers(machine, PIPE_MAX_CONSTANT_BUFFERS, + constants, const_size); for (i = 0; i < count; i += MAX_TGSI_VERTICES) { unsigned int max_vertices = MIN2(MAX_TGSI_VERTICES, count - i); diff --git a/src/gallium/auxiliary/draw/draw_vs_llvm.c b/src/gallium/auxiliary/draw/draw_vs_llvm.c index 6c13df7913..d13ad24fff 100644 --- a/src/gallium/auxiliary/draw/draw_vs_llvm.c +++ b/src/gallium/auxiliary/draw/draw_vs_llvm.c @@ -49,6 +49,7 @@ vs_llvm_run_linear( struct draw_vertex_shader *shader, const float (*input)[4], float (*output)[4], const void *constants[PIPE_MAX_CONSTANT_BUFFERS], + const unsigned constants_size[PIPE_MAX_CONSTANT_BUFFERS], unsigned count, unsigned input_stride, unsigned output_stride ) diff --git a/src/gallium/auxiliary/draw/draw_vs_varient.c b/src/gallium/auxiliary/draw/draw_vs_varient.c index 6eb26927f2..eacd160187 100644 --- a/src/gallium/auxiliary/draw/draw_vs_varient.c +++ b/src/gallium/auxiliary/draw/draw_vs_varient.c @@ -149,7 +149,8 @@ static void PIPE_CDECL vsvg_run_elts( struct draw_vs_varient *varient, vsvg->base.vs->run_linear( vsvg->base.vs, temp_buffer, temp_buffer, - vsvg->base.vs->draw->pt.user.vs_constants, + vsvg->base.vs->draw->pt.user.vs_constants, + vsvg->base.vs->draw->pt.user.vs_constants_size, count, temp_vertex_stride, temp_vertex_stride); @@ -214,7 +215,8 @@ static void PIPE_CDECL vsvg_run_linear( struct draw_vs_varient *varient, vsvg->base.vs->run_linear( vsvg->base.vs, temp_buffer, temp_buffer, - vsvg->base.vs->draw->pt.user.vs_constants, + vsvg->base.vs->draw->pt.user.vs_constants, + vsvg->base.vs->draw->pt.user.vs_constants_size, count, temp_vertex_stride, temp_vertex_stride); diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c index 5275faa5e2..6fcbf4f212 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.c +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c @@ -557,6 +557,23 @@ print_temp(const struct tgsi_exec_machine *mach, uint index) #endif +void +tgsi_exec_set_constant_buffers(struct tgsi_exec_machine *mach, + unsigned num_bufs, + const void **bufs, + const unsigned *buf_sizes) +{ + unsigned i; + + for (i = 0; i < num_bufs; i++) { + mach->Consts[i] = bufs[i]; + mach->ConstsSize[i] = buf_sizes[i]; + } +} + + + + /** * Check if there's a potential src/dst register data dependency when * using SOA execution. @@ -632,6 +649,11 @@ tgsi_exec_machine_bind_shader( util_init_math(); + if (numSamplers) { + assert(samplers); + assert(samplers[0]); + } + mach->Tokens = tokens; mach->Samplers = samplers; @@ -1040,6 +1062,8 @@ fetch_src_file_channel(const struct tgsi_exec_machine *mach, { uint i; + assert(swizzle < 4); + switch (file) { case TGSI_FILE_CONSTANT: for (i = 0; i < QUAD_SIZE; i++) { @@ -1049,9 +1073,23 @@ fetch_src_file_channel(const struct tgsi_exec_machine *mach, if (index->i[i] < 0) { chan->u[i] = 0; } else { - const uint *p = (const uint *)mach->Consts[index2D->i[i]]; - - chan->u[i] = p[index->i[i] * 4 + swizzle]; + /* NOTE: copying the const value as a uint instead of float */ + const uint constbuf = index2D->i[i]; + const uint *buf = (const uint *)mach->Consts[constbuf]; + const int pos = index->i[i] * 4 + swizzle; + /* const buffer bounds check */ + if (pos < 0 || pos >= mach->ConstsSize[constbuf]) { + if (0) { + /* Debug: print warning */ + static int count = 0; + if (count++ < 100) + debug_printf("TGSI Exec: const buffer index %d" + " out of bounds\n", pos); + } + chan->u[i] = 0; + } + else + chan->u[i] = buf[pos]; } } break; @@ -1065,9 +1103,10 @@ fetch_src_file_channel(const struct tgsi_exec_machine *mach, index2D->i[i] * TGSI_EXEC_MAX_INPUT_ATTRIBS + index->i[i], index2D->i[i], index->i[i]); }*/ - chan->u[i] = mach->Inputs[index2D->i[i] * - TGSI_EXEC_MAX_INPUT_ATTRIBS + - index->i[i]].xyzw[swizzle].u[i]; + int pos = index2D->i[i] * TGSI_EXEC_MAX_INPUT_ATTRIBS + index->i[i]; + assert(pos >= 0); + assert(pos < Elements(mach->Inputs)); + chan->u[i] = mach->Inputs[pos].xyzw[swizzle].u[i]; } break; @@ -1187,7 +1226,7 @@ fetch_source(const struct tgsi_exec_machine *mach, index2.i[1] = index2.i[2] = index2.i[3] = reg->Indirect.Index; - + assert(reg->Indirect.File == TGSI_FILE_ADDRESS); /* get current value of address register[swizzle] */ swizzle = tgsi_util_get_src_register_swizzle( ®->Indirect, CHAN_X ); fetch_src_file_channel(mach, diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.h b/src/gallium/auxiliary/tgsi/tgsi_exec.h index ccf80ca6fd..6dee362d58 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.h +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.h @@ -253,7 +253,10 @@ struct tgsi_exec_machine struct tgsi_sampler **Samplers; unsigned ImmLimit; + const void *Consts[PIPE_MAX_CONSTANT_BUFFERS]; + unsigned ConstsSize[PIPE_MAX_CONSTANT_BUFFERS]; + const struct tgsi_token *Tokens; /**< Declarations, instructions */ unsigned Processor; /**< TGSI_PROCESSOR_x */ @@ -367,6 +370,13 @@ tgsi_set_exec_mask(struct tgsi_exec_machine *mach, } +extern void +tgsi_exec_set_constant_buffers(struct tgsi_exec_machine *mach, + unsigned num_bufs, + const void **bufs, + const unsigned *buf_sizes); + + #if defined __cplusplus } /* extern "C" */ #endif diff --git a/src/gallium/drivers/softpipe/sp_context.h b/src/gallium/drivers/softpipe/sp_context.h index c5f53cfa61..9361a3df09 100644 --- a/src/gallium/drivers/softpipe/sp_context.h +++ b/src/gallium/drivers/softpipe/sp_context.h @@ -112,6 +112,7 @@ struct softpipe_context { /** Mapped constant buffers */ const void *mapped_constants[PIPE_SHADER_TYPES][PIPE_MAX_CONSTANT_BUFFERS]; + unsigned const_buffer_size[PIPE_SHADER_TYPES][PIPE_MAX_CONSTANT_BUFFERS]; /** Vertex format */ struct vertex_info vertex_info; diff --git a/src/gallium/drivers/softpipe/sp_quad_fs.c b/src/gallium/drivers/softpipe/sp_quad_fs.c index d240bcbf3b..90f4787d59 100644 --- a/src/gallium/drivers/softpipe/sp_quad_fs.c +++ b/src/gallium/drivers/softpipe/sp_quad_fs.c @@ -111,9 +111,10 @@ shade_quads(struct quad_stage *qs, struct tgsi_exec_machine *machine = softpipe->fs_machine; unsigned i, nr_quads = 0; - for (i = 0; i < PIPE_MAX_CONSTANT_BUFFERS; i++) { - machine->Consts[i] = softpipe->mapped_constants[PIPE_SHADER_FRAGMENT][i]; - } + tgsi_exec_set_constant_buffers(machine, PIPE_MAX_CONSTANT_BUFFERS, + softpipe->mapped_constants[PIPE_SHADER_FRAGMENT], + softpipe->const_buffer_size[PIPE_SHADER_FRAGMENT]); + machine->InterpCoefs = quads[0]->coef; for (i = 0; i < nr; i++) { diff --git a/src/gallium/drivers/softpipe/sp_state_fs.c b/src/gallium/drivers/softpipe/sp_state_fs.c index 3fbf1f2578..ded242d3dc 100644 --- a/src/gallium/drivers/softpipe/sp_state_fs.c +++ b/src/gallium/drivers/softpipe/sp_state_fs.c @@ -195,6 +195,8 @@ softpipe_set_constant_buffer(struct pipe_context *pipe, } softpipe->mapped_constants[shader][index] = data; + softpipe->const_buffer_size[shader][index] = size; + softpipe->dirty |= SP_NEW_CONSTANTS; } -- cgit v1.2.3 From 6c1625cc405f0d77523c122cedf3e8003f2aa7bf Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 29 Jul 2010 17:31:17 -0600 Subject: llvmpipe: added some jit debug code If we crash in the jitted function we can examine jit_line and jit_state in gdb to learn more about the shader. --- src/gallium/drivers/llvmpipe/lp_rast.c | 10 ++++++++++ src/gallium/drivers/llvmpipe/lp_rast_priv.h | 30 +++++++++++++++++++++++++++++ 2 files changed, 40 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index ba7b48328b..3215d0f652 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -43,6 +43,12 @@ #include "lp_scene.h" +#ifdef DEBUG +int jit_line = 0; +const struct lp_rast_state *jit_state = NULL; +#endif + + /** * Begin rasterizing a scene. * Called once per scene by one thread. @@ -419,6 +425,7 @@ lp_rast_shade_tile(struct lp_rasterizer_task *task, depth = lp_rast_get_depth_block_pointer(task, tile_x + x, tile_y + y); /* run shader on 4x4 block */ + BEGIN_JIT_CALL(state); variant->jit_function[RAST_WHOLE]( &state->jit_context, tile_x + x, tile_y + y, inputs->facing, @@ -429,6 +436,7 @@ lp_rast_shade_tile(struct lp_rasterizer_task *task, depth, 0xffff, &task->vis_counter); + END_JIT_CALL(); } } } @@ -498,6 +506,7 @@ lp_rast_shade_quads_mask(struct lp_rasterizer_task *task, assert(lp_check_alignment(state->jit_context.blend_color, 16)); /* run shader on 4x4 block */ + BEGIN_JIT_CALL(state); variant->jit_function[RAST_EDGE_TEST](&state->jit_context, x, y, inputs->facing, @@ -508,6 +517,7 @@ lp_rast_shade_quads_mask(struct lp_rasterizer_task *task, depth, mask, &task->vis_counter); + END_JIT_CALL(); } diff --git a/src/gallium/drivers/llvmpipe/lp_rast_priv.h b/src/gallium/drivers/llvmpipe/lp_rast_priv.h index b4a48cfd02..760ab3db1f 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_priv.h +++ b/src/gallium/drivers/llvmpipe/lp_rast_priv.h @@ -40,6 +40,34 @@ #include "lp_limits.h" +/* If we crash in a jitted function, we can examine jit_line and jit_state + * to get some info. This is not thread-safe, however. + */ +#ifdef DEBUG + +extern int jit_line; +extern const struct lp_rast_state *jit_state; + +#define BEGIN_JIT_CALL(state) \ + do { \ + jit_line = __LINE__; \ + jit_state = state; \ + } while (0) + +#define END_JIT_CALL() \ + do { \ + jit_line = 0; \ + jit_state = NULL; \ + } while (0) + +#else + +#define BEGIN_JIT_CALL(X) +#define END_JIT_CALL + +#endif + + struct lp_rasterizer; @@ -249,6 +277,7 @@ lp_rast_shade_quads_all( struct lp_rasterizer_task *task, depth = lp_rast_get_depth_block_pointer(task, x, y); /* run shader on 4x4 block */ + BEGIN_JIT_CALL(state); variant->jit_function[RAST_WHOLE]( &state->jit_context, x, y, inputs->facing, @@ -259,6 +288,7 @@ lp_rast_shade_quads_all( struct lp_rasterizer_task *task, depth, 0xffff, &task->vis_counter ); + END_JIT_CALL(); } -- cgit v1.2.3 From e75da1a8d668aa2a245a7c2690b42ae7a6038f48 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 29 Jul 2010 17:49:49 -0600 Subject: llvmpipe: fix on-debug build breakage --- src/gallium/drivers/llvmpipe/lp_rast_priv.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/llvmpipe/lp_rast_priv.h b/src/gallium/drivers/llvmpipe/lp_rast_priv.h index 760ab3db1f..fae7f6d3dc 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_priv.h +++ b/src/gallium/drivers/llvmpipe/lp_rast_priv.h @@ -63,7 +63,7 @@ extern const struct lp_rast_state *jit_state; #else #define BEGIN_JIT_CALL(X) -#define END_JIT_CALL +#define END_JIT_CALL() #endif -- cgit v1.2.3 From 6c288d06ac512be6eb7f19a9005389dd46d5a26a Mon Sep 17 00:00:00 2001 From: Jerome Glisse Date: Fri, 30 Jul 2010 11:10:24 -0400 Subject: r600g: fix typo in tex instruction + shader semantic id fix It seems we never get semantic id from TGSI so fallback to use output number as id. Signed-off-by: Jerome Glisse --- src/gallium/drivers/r600/r600_shader.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index c61cc11e88..dd0e039bf6 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -138,12 +138,10 @@ static int r600_pipe_shader_vs(struct pipe_context *ctx, struct r600_context_sta for (i = 0; i < 10; i++) { state->states[R600_VS_SHADER__SPI_VS_OUT_ID_0 + i] = 0; } - for (i = 0, j = 0; i < rshader->noutput; i++) { - if (rshader->output[i].name != TGSI_SEMANTIC_POSITION) { - tmp = rshader->output[i].sid << ((j & 3) * 8); - state->states[R600_VS_SHADER__SPI_VS_OUT_ID_0 + j / 4] |= tmp; - j++; - } + /* so far never got proper semantic id from tgsi */ + for (i = 0; i < 32; i++) { + tmp = i << ((i & 3) * 8); + state->states[R600_VS_SHADER__SPI_VS_OUT_ID_0 + i / 4] |= tmp; } state->states[R600_VS_SHADER__SPI_VS_OUT_CONFIG] = S_0286C4_VS_EXPORT_COUNT(rshader->noutput - 2); state->states[R600_VS_SHADER__SQ_PGM_RESOURCES_VS] = S_028868_NUM_GPRS(rshader->bc.ngpr); @@ -167,7 +165,7 @@ static int r600_pipe_shader_ps(struct pipe_context *ctx, struct r600_context_sta if (state == NULL) return -ENOMEM; for (i = 0; i < rshader->ninput; i++) { - tmp = S_028644_SEMANTIC(rshader->input[i].sid); + tmp = S_028644_SEMANTIC(i); tmp |= S_028644_SEL_CENTROID(1); if (rshader->input[i].name == TGSI_SEMANTIC_COLOR || rshader->input[i].name == TGSI_SEMANTIC_BCOLOR) { @@ -525,6 +523,7 @@ static int tgsi_op2(struct r600_shader_ctx *ctx) memset(&alu, 0, sizeof(struct r600_bc_alu)); if (!(inst->Dst[0].Register.WriteMask & (1 << i))) { alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP; + alu.dst.chan = i; } else { alu.inst = ctx->inst_info->r600_opcode; for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { @@ -567,6 +566,7 @@ static int tgsi_slt(struct r600_shader_ctx *ctx) memset(&alu, 0, sizeof(struct r600_bc_alu)); if (!(inst->Dst[0].Register.WriteMask & (1 << i))) { alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP; + alu.dst.chan = i; } else { alu.inst = ctx->inst_info->r600_opcode; r = tgsi_src(ctx, &inst->Src[0], i, &alu.src[1]); @@ -747,6 +747,7 @@ static int tgsi_helper_copy(struct r600_shader_ctx *ctx, struct tgsi_full_instru memset(&alu, 0, sizeof(struct r600_bc_alu)); if (!(inst->Dst[0].Register.WriteMask & (1 << i))) { alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP; + alu.dst.chan = i; } else { alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV; r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); @@ -918,7 +919,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) tex.resource_id = ctx->file_offset[inst->Src[1].Register.File] + inst->Src[1].Register.Index; tex.sampler_id = tex.resource_id; tex.src_gpr = src_gpr; - tex.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Src[0].Register.Index; + tex.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index; tex.dst_sel_x = 0; tex.dst_sel_y = 1; tex.dst_sel_z = 2; -- cgit v1.2.3 From 042018a943a403a4d9887b400deb3b3c83ee40c0 Mon Sep 17 00:00:00 2001 From: Zack Rusin Date: Tue, 27 Jul 2010 12:26:54 -0400 Subject: llvmpipe: delete function bodies after generating machine code --- src/gallium/auxiliary/draw/draw_llvm.c | 2 ++ src/gallium/auxiliary/gallivm/lp_bld_init.h | 2 ++ src/gallium/auxiliary/gallivm/lp_bld_misc.cpp | 8 ++++++++ src/gallium/drivers/llvmpipe/lp_state_fs.c | 1 + 4 files changed, 13 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/auxiliary/draw/draw_llvm.c b/src/gallium/auxiliary/draw/draw_llvm.c index 48489e5f6f..8022b720b3 100644 --- a/src/gallium/auxiliary/draw/draw_llvm.c +++ b/src/gallium/auxiliary/draw/draw_llvm.c @@ -826,6 +826,7 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant) if (gallivm_debug & GALLIVM_DEBUG_ASM) { lp_disassemble(code); } + lp_func_delete_body(variant->function); } @@ -1001,6 +1002,7 @@ draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *varian if (gallivm_debug & GALLIVM_DEBUG_ASM) { lp_disassemble(code); } + lp_func_delete_body(variant->function_elts); } void diff --git a/src/gallium/auxiliary/gallivm/lp_bld_init.h b/src/gallium/auxiliary/gallivm/lp_bld_init.h index a32ced9b4c..f26fdac466 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_init.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_init.h @@ -44,5 +44,7 @@ extern LLVMPassManagerRef lp_build_pass; void lp_build_init(void); +extern void +lp_func_delete_body(LLVMValueRef func); #endif /* !LP_BLD_INIT_H */ diff --git a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp index 072408b268..6d5410d970 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp +++ b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp @@ -166,3 +166,11 @@ lp_set_target_options(void) } #endif } + + +extern "C" void +lp_func_delete_body(LLVMValueRef FF) +{ + llvm::Function *func = llvm::unwrap(FF); + func->deleteBody(); +} diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c index dbcc286417..5ee5bde184 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c @@ -715,6 +715,7 @@ generate_fragment(struct llvmpipe_context *lp, if (gallivm_debug & GALLIVM_DEBUG_ASM) { lp_disassemble(f); } + lp_func_delete_body(function); } } -- cgit v1.2.3 From 078eff659a7ef90691966d983f35ed9e4ce63901 Mon Sep 17 00:00:00 2001 From: Vinson Lee Date: Fri, 30 Jul 2010 11:48:48 -0700 Subject: llvmpipe: Fix implicit declaration of lp_func_delete_body warnings. --- src/gallium/auxiliary/draw/draw_llvm.c | 1 + src/gallium/drivers/llvmpipe/lp_state_fs.c | 1 + 2 files changed, 2 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/auxiliary/draw/draw_llvm.c b/src/gallium/auxiliary/draw/draw_llvm.c index 8022b720b3..de99b00a81 100644 --- a/src/gallium/auxiliary/draw/draw_llvm.c +++ b/src/gallium/auxiliary/draw/draw_llvm.c @@ -38,6 +38,7 @@ #include "gallivm/lp_bld_tgsi.h" #include "gallivm/lp_bld_printf.h" #include "gallivm/lp_bld_intr.h" +#include "gallivm/lp_bld_init.h" #include "tgsi/tgsi_exec.h" #include "tgsi/tgsi_dump.h" diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c index 5ee5bde184..dbca49a2ef 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c @@ -75,6 +75,7 @@ #include "gallivm/lp_bld_type.h" #include "gallivm/lp_bld_const.h" #include "gallivm/lp_bld_conv.h" +#include "gallivm/lp_bld_init.h" #include "gallivm/lp_bld_intr.h" #include "gallivm/lp_bld_logic.h" #include "gallivm/lp_bld_tgsi.h" -- cgit v1.2.3 From 79ab5b9798c911e7612e26616df82e98372549bf Mon Sep 17 00:00:00 2001 From: Vinson Lee Date: Fri, 30 Jul 2010 13:11:14 -0700 Subject: llvmpipe: Silence unused value warning. --- src/gallium/drivers/llvmpipe/lp_test_printf.c | 1 + 1 file changed, 1 insertion(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/llvmpipe/lp_test_printf.c b/src/gallium/drivers/llvmpipe/lp_test_printf.c index a3447bf53f..4653f30e39 100644 --- a/src/gallium/drivers/llvmpipe/lp_test_printf.c +++ b/src/gallium/drivers/llvmpipe/lp_test_printf.c @@ -120,6 +120,7 @@ test_printf(unsigned verbose, FILE *fp, const struct printf_test_case *testcase) abort(); } #else + (void) provider; engine = lp_build_engine; #endif -- cgit v1.2.3 From 094d66f45992830929d620782c70836b4b9b4a37 Mon Sep 17 00:00:00 2001 From: Jerome Glisse Date: Fri, 30 Jul 2010 13:58:46 -0400 Subject: r600g: add KIL opcode support Signed-off-by: Jerome Glisse --- src/gallium/drivers/r600/r600_shader.c | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index dd0e039bf6..8159cefd94 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -556,6 +556,30 @@ static int tgsi_op2(struct r600_shader_ctx *ctx) return 0; } +static int tgsi_kill(struct r600_shader_ctx *ctx) +{ + struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; + struct r600_bc_alu alu; + int i, r; + + for (i = 0; i < 4; i++) { + memset(&alu, 0, sizeof(struct r600_bc_alu)); + alu.inst = ctx->inst_info->r600_opcode; + alu.dst.chan = i; + alu.src[0].sel = 248; + r = tgsi_src(ctx, &inst->Src[0], i, &alu.src[1]); + if (r) + return r; + if (i == 3) { + alu.last = 1; + } + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + } + return 0; +} + static int tgsi_slt(struct r600_shader_ctx *ctx) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; @@ -1139,7 +1163,7 @@ static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = { {TGSI_OPCODE_CALLNZ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_IFC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_BREAKC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_KIL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, /* conditional kill */ + {TGSI_OPCODE_KIL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* conditional kill */ {TGSI_OPCODE_END, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end}, /* aka HALT */ /* gap */ {118, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, -- cgit v1.2.3 From 5fa1af0905ef2f513fe296f1d8e658dbdf24dd74 Mon Sep 17 00:00:00 2001 From: Vinson Lee Date: Sat, 31 Jul 2010 14:56:53 -0700 Subject: r600g: Remove unnecessary header. --- src/gallium/drivers/r600/r600_texture.c | 1 - 1 file changed, 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/r600_texture.c b/src/gallium/drivers/r600/r600_texture.c index 96173b0ed6..3725bf8560 100644 --- a/src/gallium/drivers/r600/r600_texture.c +++ b/src/gallium/drivers/r600/r600_texture.c @@ -33,7 +33,6 @@ #include "r600_screen.h" #include "r600_context.h" #include "r600_resource.h" -#include "r600d.h" extern struct u_resource_vtbl r600_texture_vtbl; -- cgit v1.2.3 From 1d74a1674c0675dc59588167741c4abd2cc82cf9 Mon Sep 17 00:00:00 2001 From: Vinson Lee Date: Sat, 31 Jul 2010 19:23:14 -0700 Subject: r600g: Remove unnecessary header. --- src/gallium/drivers/r600/r700_asm.c | 1 - 1 file changed, 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/r700_asm.c b/src/gallium/drivers/r600/r700_asm.c index 3532ba5b0c..1ebe20d6ab 100644 --- a/src/gallium/drivers/r600/r700_asm.c +++ b/src/gallium/drivers/r600/r700_asm.c @@ -25,7 +25,6 @@ #include "util/u_memory.h" #include "r700_sq.h" #include -#include int r700_bc_alu_build(struct r600_bc *bc, struct r600_bc_alu *alu, unsigned id) { -- cgit v1.2.3 From a87cedbde0f6f1335691c524d539552f5c056215 Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Sun, 1 Aug 2010 16:42:53 +0200 Subject: r300g: fix microtiling on RS6xx Getting tiling right has always been tricky. There are so many subtle details... --- src/gallium/drivers/r300/r300_texture_desc.c | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_texture_desc.c b/src/gallium/drivers/r300/r300_texture_desc.c index 343089bf2c..5d690e8c33 100644 --- a/src/gallium/drivers/r300/r300_texture_desc.c +++ b/src/gallium/drivers/r300/r300_texture_desc.c @@ -136,13 +136,27 @@ static unsigned r300_texture_get_stride(struct r300_screen *screen, stride = util_format_get_stride(desc->b.b.format, width); - /* Some IGPs need a minimum stride of 64 bytes, hmm... - * This doesn't seem to apply to tiled textures, according to r300c. */ - if (!desc->microtile && !desc->macrotile[level] && + /* Some IGPs need a minimum stride of 64 bytes, hmm... */ + if (!desc->macrotile[level] && (screen->caps.family == CHIP_FAMILY_RS600 || screen->caps.family == CHIP_FAMILY_RS690 || screen->caps.family == CHIP_FAMILY_RS740)) { - return stride < 64 ? 64 : stride; + unsigned min_stride; + + if (desc->microtile) { + unsigned tile_height = + r300_get_pixel_alignment(desc->b.b.format, + desc->b.b.nr_samples, + desc->microtile, + desc->macrotile[level], + DIM_HEIGHT); + + min_stride = 64 / tile_height; + } else { + min_stride = 64; + } + + return stride < min_stride ? min_stride : stride; } /* The alignment to 32 bytes is sort of implied by the layout... */ -- cgit v1.2.3 From 919750a55771cfd731e2f90d1385cbb4265e4cf4 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Mon, 2 Aug 2010 14:31:49 +1000 Subject: r600g: make r600_db_format static. this isn't used anywhere else yet. --- src/gallium/drivers/r600/r600_state.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index 0191070daa..b27d9d5557 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -667,7 +667,7 @@ static struct radeon_state *r600_cb0(struct r600_context *rctx) return rstate; } -int r600_db_format(unsigned pformat, unsigned *format) +static int r600_db_format(unsigned pformat, unsigned *format) { switch (pformat) { case PIPE_FORMAT_Z24X8_UNORM: -- cgit v1.2.3 From 01984cf34fa4e6d564d06a126795468b5718ecb0 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Mon, 2 Aug 2010 14:53:31 +1000 Subject: r600g: set correct tex coord type for rect textures. --- src/gallium/drivers/r600/r600_shader.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index 8159cefd94..0582839905 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -952,10 +952,12 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) tex.src_sel_y = 1; tex.src_sel_z = 2; tex.src_sel_w = 3; - tex.coord_type_x = 1; - tex.coord_type_y = 1; - tex.coord_type_z = 1; - tex.coord_type_w = 1; + if (inst->Texture.Texture != TGSI_TEXTURE_RECT) { + tex.coord_type_x = 1; + tex.coord_type_y = 1; + tex.coord_type_z = 1; + tex.coord_type_w = 1; + } return r600_bc_add_tex(ctx->bc, &tex); } -- cgit v1.2.3 From 7bcd39ce50b7b710bb8561c430f345ebe91ab9a3 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Mon, 2 Aug 2010 14:42:29 +1000 Subject: r600g: add initial blend state. migrates cb_cntl to be regenerated --- src/gallium/drivers/r600/r600_context.c | 14 ---- src/gallium/drivers/r600/r600_context.h | 1 + src/gallium/drivers/r600/r600_reg.h | 72 +++++++++++++++++++ src/gallium/drivers/r600/r600_state.c | 95 ++++++++++++++++++++++-- src/gallium/drivers/r600/r600_state_inlines.h | 100 ++++++++++++++++++++++++++ 5 files changed, 262 insertions(+), 20 deletions(-) create mode 100644 src/gallium/drivers/r600/r600_reg.h create mode 100644 src/gallium/drivers/r600/r600_state_inlines.h (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/r600_context.c b/src/gallium/drivers/r600/r600_context.c index fc8aa1b866..8b191914f5 100644 --- a/src/gallium/drivers/r600/r600_context.c +++ b/src/gallium/drivers/r600/r600_context.c @@ -331,20 +331,6 @@ struct pipe_context *r600_create_context(struct pipe_screen *screen, void *priv) return NULL; } - rctx->hw_states.cb_cntl = radeon_state(rscreen->rw, R600_CB_CNTL_TYPE, R600_CB_CNTL); - rctx->hw_states.cb_cntl->states[R600_CB_CNTL__CB_SHADER_MASK] = 0x0000000F; - rctx->hw_states.cb_cntl->states[R600_CB_CNTL__CB_TARGET_MASK] = 0x0000000F; - rctx->hw_states.cb_cntl->states[R600_CB_CNTL__CB_COLOR_CONTROL] = 0x00CC0000; - rctx->hw_states.cb_cntl->states[R600_CB_CNTL__PA_SC_AA_CONFIG] = 0x00000000; - rctx->hw_states.cb_cntl->states[R600_CB_CNTL__PA_SC_AA_SAMPLE_LOCS_MCTX] = 0x00000000; - rctx->hw_states.cb_cntl->states[R600_CB_CNTL__PA_SC_AA_SAMPLE_LOCS_8S_WD1_MCTX] = 0x00000000; - rctx->hw_states.cb_cntl->states[R600_CB_CNTL__CB_CLRCMP_CONTROL] = 0x01000000; - rctx->hw_states.cb_cntl->states[R600_CB_CNTL__CB_CLRCMP_SRC] = 0x00000000; - rctx->hw_states.cb_cntl->states[R600_CB_CNTL__CB_CLRCMP_DST] = 0x000000FF; - rctx->hw_states.cb_cntl->states[R600_CB_CNTL__CB_CLRCMP_MSK] = 0xFFFFFFFF; - rctx->hw_states.cb_cntl->states[R600_CB_CNTL__PA_SC_AA_MASK] = 0xFFFFFFFF; - radeon_state_pm4(rctx->hw_states.cb_cntl); - r600_init_config(rctx); rctx->ctx = radeon_ctx(rscreen->rw); diff --git a/src/gallium/drivers/r600/r600_context.h b/src/gallium/drivers/r600/r600_context.h index 97c8a46bb0..a1ee9577ba 100644 --- a/src/gallium/drivers/r600/r600_context.h +++ b/src/gallium/drivers/r600/r600_context.h @@ -158,6 +158,7 @@ struct r600_context { struct r600_vertex_element *vertex_elements; struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS]; struct pipe_index_buffer index_buffer; + struct pipe_blend_color blend_color; }; #if 0 diff --git a/src/gallium/drivers/r600/r600_reg.h b/src/gallium/drivers/r600/r600_reg.h new file mode 100644 index 0000000000..2600875d5c --- /dev/null +++ b/src/gallium/drivers/r600/r600_reg.h @@ -0,0 +1,72 @@ +#ifndef R600_REG_H +#define R600_REG_H + +/* for regs which haven't been generated yet */ + +#define R600_BLEND_ZERO 0 +#define R600_BLEND_ONE 1 +#define R600_BLEND_SRC_COLOR 2 +#define R600_BLEND_ONE_MINUS_SRC_COLOR 3 +#define R600_BLEND_SRC_ALPHA 4 +#define R600_BLEND_ONE_MINUS_SRC_ALPHA 5 +#define R600_BLEND_DST_ALPHA 6 +#define R600_BLEND_ONE_MINUS_DST_ALPHA 7 +#define R600_BLEND_DST_COLOR 8 +#define R600_BLEND_ONE_MINUS_DST_COLOR 9 +#define R600_BLEND_SRC_ALPHA_SATURATE 10 +#define R600_BLEND_BOTH_SRC_ALPHA 11 +#define R600_BLEND_BOTH_INV_SRC_ALPHA 12 +#define R600_BLEND_CONST_COLOR 13 +#define R600_BLEND_ONE_MINUS_CONST_COLOR 14 +#define R600_BLEND_SRC1_COLOR 15 +#define R600_BLEND_INV_SRC1_COLOR 16 +#define R600_BLEND_SRC1_ALPHA 17 +#define R600_BLEND_INV_SRC1_ALPHA 18 +#define R600_BLEND_CONST_ALPHA 19 +#define R600_BLEND_ONE_MINUS_CONST_ALPHA 20 + +#define R600_BLEND_FCN_ADD 0 +#define R600_BLEND_FCN_SUBTRACT 1 +#define R600_BLEND_FCN_MIN 2 +#define R600_BLEND_FCN_MAX 3 +#define R600_BLEND_FCN_RSUB 4 + +#define CB_BLEND_COLOR_SRCBLEND_SHIFT 0 +#define CB_BLEND_COLOR_COMB_FCN_SHIFT 5 +#define CB_BLEND_COLOR_DESTBLEND_SHIFT 8 +#define CB_BLEND_ALPHA_SRCBLEND_SHIFT 16 +#define CB_BLEND_ALPHA_COMB_FCN_SHIFT 21 +#define CB_BLEND_ALPHA_DESTBLEND_SHIFT 24 +#define CB_BLEND_SEPARATE_ALPHA_BLEND (1 << 29) + +#define SX_ALPHA_TEST_FUNC_SHIFT (0) +#define SX_ALPHA_TEST_ENABLE (1 << 3) + +#define R600_ZS_KEEP 0 +#define R600_ZS_ZERO 1 +#define R600_ZS_REPLACE 2 +#define R600_ZS_INCR 3 +#define R600_ZS_DECR 4 +#define R600_ZS_INVERT 5 +#define R600_ZS_INCR_WRAP 6 +#define R600_ZS_DECR_WRAP 7 + +#define R600_STENCILREF_SHIFT 0 +#define R600_STENCILMASK_SHIFT 8 +#define R600_STENCILWRITEMASK_SHIFT 16 + +#define PA_SU_PS_WIDTH_SHIFT 16 + +#define PA_SU_CULL_FRONT (1 << 0) +#define PA_SU_CULL_BACK (1 << 1) +#define PA_SU_FACE_CCW (0 << 2) +#define PA_SU_FACE_CW (1 << 2) + +#define PA_SU_POLYMODE_FRONT_SHIFT 5 +#define PA_SU_POLYMODE_BACK_SHIFT 5 +#define POLYGON_MODE_POINT 0 +#define POLYGON_MODE_LINE 1 +#define POLYGON_MODE_TRI 2 + + +#endif diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index b27d9d5557..56304cc69f 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -32,6 +32,8 @@ #include "r600_context.h" #include "r600_resource.h" #include "r600d.h" +#include "r600_reg.h" +#include "r600_state_inlines.h" static void *r600_create_blend_state(struct pipe_context *ctx, const struct pipe_blend_state *state) @@ -259,6 +261,9 @@ static void r600_delete_state(struct pipe_context *ctx, void *state) static void r600_set_blend_color(struct pipe_context *ctx, const struct pipe_blend_color *color) { + struct r600_context *rctx = r600_context(ctx); + + rctx->blend_color = *color; } static void r600_set_clip_state(struct pipe_context *ctx, @@ -604,15 +609,17 @@ static struct radeon_state *r600_blend(struct r600_context *rctx) { struct r600_screen *rscreen = rctx->screen; struct radeon_state *rstate; + const struct pipe_blend_state *state = &rctx->blend->state.blend; + int i; rstate = radeon_state(rscreen->rw, R600_BLEND_TYPE, R600_BLEND); if (rstate == NULL) return NULL; - rstate->states[R600_BLEND__CB_BLEND_RED] = 0x00000000; - rstate->states[R600_BLEND__CB_BLEND_GREEN] = 0x00000000; - rstate->states[R600_BLEND__CB_BLEND_BLUE] = 0x00000000; - rstate->states[R600_BLEND__CB_BLEND_ALPHA] = 0x00000000; - rstate->states[R600_BLEND__CB_BLEND0_CONTROL] = 0x00010001; + rstate->states[R600_BLEND__CB_BLEND_RED] = fui(rctx->blend_color.color[0]); + rstate->states[R600_BLEND__CB_BLEND_GREEN] = fui(rctx->blend_color.color[1]); + rstate->states[R600_BLEND__CB_BLEND_BLUE] = fui(rctx->blend_color.color[2]); + rstate->states[R600_BLEND__CB_BLEND_ALPHA] = fui(rctx->blend_color.color[3]); + rstate->states[R600_BLEND__CB_BLEND0_CONTROL] = 0x00000000; rstate->states[R600_BLEND__CB_BLEND1_CONTROL] = 0x00000000; rstate->states[R600_BLEND__CB_BLEND2_CONTROL] = 0x00000000; rstate->states[R600_BLEND__CB_BLEND3_CONTROL] = 0x00000000; @@ -621,6 +628,37 @@ static struct radeon_state *r600_blend(struct r600_context *rctx) rstate->states[R600_BLEND__CB_BLEND6_CONTROL] = 0x00000000; rstate->states[R600_BLEND__CB_BLEND7_CONTROL] = 0x00000000; rstate->states[R600_BLEND__CB_BLEND_CONTROL] = 0x00000000; + + for (i = 0; i < 8; i++) { + + unsigned eqRGB = state->rt[i].rgb_func; + unsigned srcRGB = state->rt[i].rgb_src_factor; + unsigned dstRGB = state->rt[i].rgb_dst_factor; + + unsigned eqA = state->rt[i].alpha_func; + unsigned srcA = state->rt[i].alpha_src_factor; + unsigned dstA = state->rt[i].alpha_dst_factor; + uint32_t bc = 0; + + if (!state->rt[i].blend_enable) + continue; + + bc |= r600_translate_blend_function(eqRGB) << CB_BLEND_COLOR_COMB_FCN_SHIFT; + bc |= r600_translate_blend_factor(srcRGB) << CB_BLEND_COLOR_SRCBLEND_SHIFT; + bc |= r600_translate_blend_factor(dstRGB) << CB_BLEND_COLOR_DESTBLEND_SHIFT; + + if (srcA != srcRGB || dstA != dstRGB || eqA != eqRGB) { + bc |= CB_BLEND_SEPARATE_ALPHA_BLEND; + bc |= r600_translate_blend_function(eqA) << CB_BLEND_ALPHA_COMB_FCN_SHIFT; + bc |= r600_translate_blend_factor(srcA) << CB_BLEND_ALPHA_SRCBLEND_SHIFT; + bc |= r600_translate_blend_factor(dstA) << CB_BLEND_ALPHA_DESTBLEND_SHIFT; + } + + rstate->states[R600_BLEND__CB_BLEND0_CONTROL + i] = bc; + if (i == 0) + rstate->states[R600_BLEND__CB_BLEND_CONTROL] = bc; + } + if (radeon_state_pm4(rstate)) { radeon_state_decref(rstate); return NULL; @@ -1084,6 +1122,49 @@ static struct radeon_state *r600_resource(struct r600_context *rctx, return rstate; } +static struct radeon_state *r600_cb_cntl(struct r600_context *rctx) +{ + struct r600_screen *rscreen = rctx->screen; + struct radeon_state *rstate; + const struct pipe_blend_state *pbs = &rctx->blend->state.blend; + uint32_t color_control, target_mask; + int i; + + target_mask = 0; + color_control = 0; + + if (pbs->logicop_enable) { + color_control |= (pbs->logicop_func) << 16; + } else + color_control |= (0xcc << 16); + + target_mask |= (pbs->rt[0].colormask); + for (i = 0; i < 8; i++) { + if (pbs->rt[i].blend_enable) { + color_control |= (1 << (8 + i)); + target_mask |= (pbs->rt[0].colormask << (4 * i)); + } else if (i == 0) + target_mask |= 0xf; + } + rstate = radeon_state(rscreen->rw, R600_CB_CNTL_TYPE, R600_CB_CNTL); + rstate->states[R600_CB_CNTL__CB_SHADER_MASK] = 0x0000000F; + rstate->states[R600_CB_CNTL__CB_TARGET_MASK] = target_mask; + rstate->states[R600_CB_CNTL__CB_COLOR_CONTROL] = color_control; + rstate->states[R600_CB_CNTL__PA_SC_AA_CONFIG] = 0x00000000; + rstate->states[R600_CB_CNTL__PA_SC_AA_SAMPLE_LOCS_MCTX] = 0x00000000; + rstate->states[R600_CB_CNTL__PA_SC_AA_SAMPLE_LOCS_8S_WD1_MCTX] = 0x00000000; + rstate->states[R600_CB_CNTL__CB_CLRCMP_CONTROL] = 0x01000000; + rstate->states[R600_CB_CNTL__CB_CLRCMP_SRC] = 0x00000000; + rstate->states[R600_CB_CNTL__CB_CLRCMP_DST] = 0x000000FF; + rstate->states[R600_CB_CNTL__CB_CLRCMP_MSK] = 0xFFFFFFFF; + rstate->states[R600_CB_CNTL__PA_SC_AA_MASK] = 0xFFFFFFFF; + if (radeon_state_pm4(rstate)) { + radeon_state_decref(rstate); + return NULL; + } + return rstate; +} + int r600_context_hw_states(struct r600_context *rctx) { unsigned i; @@ -1093,7 +1174,7 @@ int r600_context_hw_states(struct r600_context *rctx) * doesn't */ //radeon_state_decref(rctx->hw_states.config); - //radeon_state_decref(rctx->hw_states.cb_cntl); + radeon_state_decref(rctx->hw_states.cb_cntl); radeon_state_decref(rctx->hw_states.db); radeon_state_decref(rctx->hw_states.rasterizer); radeon_state_decref(rctx->hw_states.scissor); @@ -1120,6 +1201,8 @@ int r600_context_hw_states(struct r600_context *rctx) rctx->hw_states.viewport = r600_viewport(rctx); rctx->hw_states.cb0 = r600_cb0(rctx); rctx->hw_states.db = r600_db(rctx); + rctx->hw_states.cb_cntl = r600_cb_cntl(rctx); + for (i = 0; i < rctx->ps_nsampler; i++) { if (rctx->ps_sampler[i]) { rctx->hw_states.ps_sampler[i] = r600_sampler(rctx, diff --git a/src/gallium/drivers/r600/r600_state_inlines.h b/src/gallium/drivers/r600/r600_state_inlines.h new file mode 100644 index 0000000000..42bab52b3f --- /dev/null +++ b/src/gallium/drivers/r600/r600_state_inlines.h @@ -0,0 +1,100 @@ +/* + * Copyright 2010 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#ifndef R600_STATE_INLINES_H +#define R600_STATE_INLINES_H + +#include "r600_reg.h" + +static INLINE uint32_t r600_translate_blend_function(int blend_func) +{ + switch (blend_func) { + case PIPE_BLEND_ADD: + return R600_BLEND_FCN_ADD; + case PIPE_BLEND_SUBTRACT: + return R600_BLEND_FCN_SUBTRACT; + case PIPE_BLEND_REVERSE_SUBTRACT: + return R600_BLEND_FCN_RSUB; + case PIPE_BLEND_MIN: + return R600_BLEND_FCN_MIN; + case PIPE_BLEND_MAX: + return R600_BLEND_FCN_MAX; + default: + fprintf(stderr, "r600: Unknown blend function %d\n", blend_func); + assert(0); + break; + } + return 0; +} + +static INLINE uint32_t r600_translate_blend_factor(int blend_fact) +{ + switch (blend_fact) { + case PIPE_BLENDFACTOR_ONE: + return R600_BLEND_ZERO; + case PIPE_BLENDFACTOR_SRC_COLOR: + return R600_BLEND_SRC_COLOR; + case PIPE_BLENDFACTOR_SRC_ALPHA: + return R600_BLEND_SRC_ALPHA; + case PIPE_BLENDFACTOR_DST_ALPHA: + return R600_BLEND_DST_ALPHA; + case PIPE_BLENDFACTOR_DST_COLOR: + return R600_BLEND_DST_COLOR; + case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: + return R600_BLEND_SRC_ALPHA_SATURATE; + case PIPE_BLENDFACTOR_CONST_COLOR: + return R600_BLEND_CONST_COLOR; + case PIPE_BLENDFACTOR_CONST_ALPHA: + return R600_BLEND_CONST_ALPHA; + case PIPE_BLENDFACTOR_ZERO: + return R600_BLEND_ZERO; + case PIPE_BLENDFACTOR_INV_SRC_COLOR: + return R600_BLEND_ONE_MINUS_SRC_COLOR; + case PIPE_BLENDFACTOR_INV_SRC_ALPHA: + return R600_BLEND_ONE_MINUS_SRC_ALPHA; + case PIPE_BLENDFACTOR_INV_DST_ALPHA: + return R600_BLEND_ONE_MINUS_DST_ALPHA; + case PIPE_BLENDFACTOR_INV_DST_COLOR: + return R600_BLEND_ONE_MINUS_DST_COLOR; + case PIPE_BLENDFACTOR_INV_CONST_COLOR: + return R600_BLEND_ONE_MINUS_CONST_COLOR; + case PIPE_BLENDFACTOR_INV_CONST_ALPHA: + return R600_BLEND_ONE_MINUS_CONST_ALPHA; + + case PIPE_BLENDFACTOR_SRC1_COLOR: + return R600_BLEND_SRC1_COLOR; + case PIPE_BLENDFACTOR_SRC1_ALPHA: + return R600_BLEND_SRC1_ALPHA; + case PIPE_BLENDFACTOR_INV_SRC1_COLOR: + return R600_BLEND_INV_SRC1_COLOR; + case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: + return R600_BLEND_INV_SRC1_ALPHA; + default: + fprintf(stderr, "r600: Implementation error: " + "Bad blend factor %d not supported!\n", blend_fact); + assert(0); + break; + } + return 0; +} + +#endif -- cgit v1.2.3 From 3f4ec394b027c6d947ccc88309a7d37bc3859e9d Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Mon, 2 Aug 2010 14:46:17 +1000 Subject: r600g: initial alpha test state --- src/gallium/drivers/r600/r600_state.c | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index 56304cc69f..9af39f7218 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -873,19 +873,27 @@ static struct radeon_state *r600_dsa(struct r600_context *rctx) const struct pipe_depth_stencil_alpha_state *state = &rctx->dsa->state.dsa; struct r600_screen *rscreen = rctx->screen; struct radeon_state *rstate; - unsigned db_depth_control; - + unsigned db_depth_control, alpha_test_control, alpha_ref; + rstate = radeon_state(rscreen->rw, R600_DSA_TYPE, R600_DSA); if (rstate == NULL) return NULL; + db_depth_control = 0x00700700 | S_028800_Z_ENABLE(state->depth.enabled) | S_028800_Z_WRITE_ENABLE(state->depth.writemask) | S_028800_ZFUNC(state->depth.func); - + alpha_test_control = 0; + alpha_ref = 0; + if (state->alpha.enabled) { + alpha_test_control = (state->alpha.func) << 0; + alpha_test_control |= SX_ALPHA_TEST_ENABLE; + alpha_ref = fui(state->alpha.ref_value); + } + rstate->states[R600_DSA__DB_STENCIL_CLEAR] = 0x00000000; rstate->states[R600_DSA__DB_DEPTH_CLEAR] = 0x3F800000; - rstate->states[R600_DSA__SX_ALPHA_TEST_CONTROL] = 0x00000000; + rstate->states[R600_DSA__SX_ALPHA_TEST_CONTROL] = alpha_test_control; rstate->states[R600_DSA__DB_STENCILREFMASK] = 0xFFFFFF00; rstate->states[R600_DSA__DB_STENCILREFMASK_BF] = 0xFFFFFF00; - rstate->states[R600_DSA__SX_ALPHA_REF] = 0x00000000; + rstate->states[R600_DSA__SX_ALPHA_REF] = alpha_ref; rstate->states[R600_DSA__SPI_FOG_FUNC_SCALE] = 0x00000000; rstate->states[R600_DSA__SPI_FOG_FUNC_BIAS] = 0x00000000; rstate->states[R600_DSA__SPI_FOG_CNTL] = 0x00000000; -- cgit v1.2.3 From 4af5f11c3232015006f61c1a6befdff3411b8d6c Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Mon, 2 Aug 2010 14:48:59 +1000 Subject: r600g: add stencil op/func translation --- src/gallium/drivers/r600/r600_state.c | 36 ++++++++++++++++++++++++--- src/gallium/drivers/r600/r600_state_inlines.h | 33 ++++++++++++++++++++++++ 2 files changed, 66 insertions(+), 3 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index 9af39f7218..bbfe4da840 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -874,12 +874,42 @@ static struct radeon_state *r600_dsa(struct r600_context *rctx) struct r600_screen *rscreen = rctx->screen; struct radeon_state *rstate; unsigned db_depth_control, alpha_test_control, alpha_ref; + unsigned stencil_ref_mask, stencil_ref_mask_bf; rstate = radeon_state(rscreen->rw, R600_DSA_TYPE, R600_DSA); if (rstate == NULL) return NULL; - db_depth_control = 0x00700700 | S_028800_Z_ENABLE(state->depth.enabled) | S_028800_Z_WRITE_ENABLE(state->depth.writemask) | S_028800_ZFUNC(state->depth.func); + stencil_ref_mask = 0; + stencil_ref_mask_bf = 0; + db_depth_control = 0x00700700 | + S_028800_Z_ENABLE(state->depth.enabled) | + S_028800_Z_WRITE_ENABLE(state->depth.writemask) | + S_028800_ZFUNC(state->depth.func); + /* set stencil enable */ + db_depth_control |= S_028800_STENCIL_ENABLE(state->stencil[0].enabled); + + if (state->stencil[0].enabled) { + + db_depth_control |= S_028800_STENCILFUNC(r600_translate_ds_func(state->stencil[0].func)); + db_depth_control |= S_028800_STENCILFAIL(r600_translate_stencil_op(state->stencil[0].fail_op)); + db_depth_control |= S_028800_STENCILZPASS(r600_translate_stencil_op(state->stencil[0].zpass_op)); + db_depth_control |= S_028800_STENCILZFAIL(r600_translate_stencil_op(state->stencil[0].zfail_op)); + + db_depth_control |= S_028800_BACKFACE_ENABLE(state->stencil[1].enabled); + + stencil_ref_mask = (state->stencil[0].valuemask << R600_STENCILMASK_SHIFT) | + (state->stencil[0].writemask << R600_STENCILWRITEMASK_SHIFT); + if (state->stencil[1].enabled) { + db_depth_control |= S_028800_STENCILFUNC_BF(r600_translate_ds_func(state->stencil[1].func)); + db_depth_control |= S_028800_STENCILFAIL_BF(r600_translate_stencil_op(state->stencil[1].fail_op)); + db_depth_control |= S_028800_STENCILZPASS_BF(r600_translate_stencil_op(state->stencil[1].zpass_op)); + db_depth_control |= S_028800_STENCILZFAIL_BF(r600_translate_stencil_op(state->stencil[1].zfail_op)); + stencil_ref_mask_bf = (state->stencil[1].valuemask << R600_STENCILMASK_SHIFT) | + (state->stencil[1].writemask << R600_STENCILWRITEMASK_SHIFT); + } + } + alpha_test_control = 0; alpha_ref = 0; if (state->alpha.enabled) { @@ -891,8 +921,8 @@ static struct radeon_state *r600_dsa(struct r600_context *rctx) rstate->states[R600_DSA__DB_STENCIL_CLEAR] = 0x00000000; rstate->states[R600_DSA__DB_DEPTH_CLEAR] = 0x3F800000; rstate->states[R600_DSA__SX_ALPHA_TEST_CONTROL] = alpha_test_control; - rstate->states[R600_DSA__DB_STENCILREFMASK] = 0xFFFFFF00; - rstate->states[R600_DSA__DB_STENCILREFMASK_BF] = 0xFFFFFF00; + rstate->states[R600_DSA__DB_STENCILREFMASK] = stencil_ref_mask; + rstate->states[R600_DSA__DB_STENCILREFMASK_BF] = stencil_ref_mask_bf; rstate->states[R600_DSA__SX_ALPHA_REF] = alpha_ref; rstate->states[R600_DSA__SPI_FOG_FUNC_SCALE] = 0x00000000; rstate->states[R600_DSA__SPI_FOG_FUNC_BIAS] = 0x00000000; diff --git a/src/gallium/drivers/r600/r600_state_inlines.h b/src/gallium/drivers/r600/r600_state_inlines.h index 42bab52b3f..369263dc8f 100644 --- a/src/gallium/drivers/r600/r600_state_inlines.h +++ b/src/gallium/drivers/r600/r600_state_inlines.h @@ -97,4 +97,37 @@ static INLINE uint32_t r600_translate_blend_factor(int blend_fact) return 0; } +static INLINE uint32_t r600_translate_stencil_op(int s_op) +{ + switch (s_op) { + case PIPE_STENCIL_OP_KEEP: + return R600_ZS_KEEP; + case PIPE_STENCIL_OP_ZERO: + return R600_ZS_ZERO; + case PIPE_STENCIL_OP_REPLACE: + return R600_ZS_REPLACE; + case PIPE_STENCIL_OP_INCR: + return R600_ZS_INCR; + case PIPE_STENCIL_OP_DECR: + return R600_ZS_DECR; + case PIPE_STENCIL_OP_INCR_WRAP: + return R600_ZS_INCR_WRAP; + case PIPE_STENCIL_OP_DECR_WRAP: + return R600_ZS_DECR_WRAP; + case PIPE_STENCIL_OP_INVERT: + return R600_ZS_INVERT; + default: + fprintf(stderr, "r600: Unknown stencil op %d", s_op); + assert(0); + break; + } + return 0; +} + +/* translates straight */ +static INLINE uint32_t r600_translate_ds_func(int func) +{ + return func; +} + #endif -- cgit v1.2.3 From afa925066c158ac49e3b0f883f67debd8545bf26 Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Mon, 2 Aug 2010 17:14:07 +0200 Subject: r300g: fix hardlock when using more than one stuffed sprite coords If texture coordinates come from the vertex shader, there are always 4 components in the rasterizer input packet, but if the coordinates are stuffed (like for point sprites), there are only 2 or 3 components (based on GB_ENABLE) and if we rasterize more, it locks up. --- src/gallium/drivers/r300/r300_state.c | 2 +- src/gallium/drivers/r300/r300_state_derived.c | 38 ++++++++++++++------------- 2 files changed, 21 insertions(+), 19 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c index fced77e6f1..bc0c86d8a4 100644 --- a/src/gallium/drivers/r300/r300_state.c +++ b/src/gallium/drivers/r300/r300_state.c @@ -1014,7 +1014,7 @@ static void* r300_create_rs_state(struct pipe_context* pipe, for (i = 0; i < 8; i++) { if (state->sprite_coord_enable & (1 << i)) stuffing_enable |= - R300_GB_TEX_STR << (R300_GB_TEX0_SOURCE_SHIFT + (i*2)); + R300_GB_TEX_ST << (R300_GB_TEX0_SOURCE_SHIFT + (i*2)); } point_texcoord_left = 0.0f; diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c index a85db27064..66f64f0f6a 100644 --- a/src/gallium/drivers/r300/r300_state_derived.c +++ b/src/gallium/drivers/r300/r300_state_derived.c @@ -213,19 +213,19 @@ static void r300_rs_tex(struct r300_rs_block* rs, int id, int ptr, enum r300_rs_swizzle swiz) { if (swiz == SWIZ_X001) { - rs->ip[id] |= R300_RS_TEX_PTR(ptr*4) | + rs->ip[id] |= R300_RS_TEX_PTR(ptr) | R300_RS_SEL_S(R300_RS_SEL_C0) | R300_RS_SEL_T(R300_RS_SEL_K0) | R300_RS_SEL_R(R300_RS_SEL_K0) | R300_RS_SEL_Q(R300_RS_SEL_K1); } else if (swiz == SWIZ_XY01) { - rs->ip[id] |= R300_RS_TEX_PTR(ptr*4) | + rs->ip[id] |= R300_RS_TEX_PTR(ptr) | R300_RS_SEL_S(R300_RS_SEL_C0) | R300_RS_SEL_T(R300_RS_SEL_C1) | R300_RS_SEL_R(R300_RS_SEL_K0) | R300_RS_SEL_Q(R300_RS_SEL_K1); } else { - rs->ip[id] |= R300_RS_TEX_PTR(ptr*4) | + rs->ip[id] |= R300_RS_TEX_PTR(ptr) | R300_RS_SEL_S(R300_RS_SEL_C0) | R300_RS_SEL_T(R300_RS_SEL_C1) | R300_RS_SEL_R(R300_RS_SEL_C2) | @@ -261,23 +261,21 @@ static void r500_rs_col_write(struct r300_rs_block* rs, int id, int fp_offset) static void r500_rs_tex(struct r300_rs_block* rs, int id, int ptr, enum r300_rs_swizzle swiz) { - int rs_tex_comp = ptr*4; - if (swiz == SWIZ_X001) { - rs->ip[id] |= R500_RS_SEL_S(rs_tex_comp) | + rs->ip[id] |= R500_RS_SEL_S(ptr) | R500_RS_SEL_T(R500_RS_IP_PTR_K0) | R500_RS_SEL_R(R500_RS_IP_PTR_K0) | R500_RS_SEL_Q(R500_RS_IP_PTR_K1); } else if (swiz == SWIZ_XY01) { - rs->ip[id] |= R500_RS_SEL_S(rs_tex_comp) | - R500_RS_SEL_T(rs_tex_comp + 1) | + rs->ip[id] |= R500_RS_SEL_S(ptr) | + R500_RS_SEL_T(ptr + 1) | R500_RS_SEL_R(R500_RS_IP_PTR_K0) | R500_RS_SEL_Q(R500_RS_IP_PTR_K1); } else { - rs->ip[id] |= R500_RS_SEL_S(rs_tex_comp) | - R500_RS_SEL_T(rs_tex_comp + 1) | - R500_RS_SEL_R(rs_tex_comp + 2) | - R500_RS_SEL_Q(rs_tex_comp + 3); + rs->ip[id] |= R500_RS_SEL_S(ptr) | + R500_RS_SEL_T(ptr + 1) | + R500_RS_SEL_R(ptr + 2) | + R500_RS_SEL_Q(ptr + 3); } rs->inst[id] |= R500_RS_INST_TEX_ID(id); } @@ -305,7 +303,7 @@ static void r300_update_rs_block(struct r300_context *r300) struct r300_shader_semantics *vs_outputs = &vs->outputs; struct r300_shader_semantics *fs_inputs = &r300_fs(r300)->shader->inputs; struct r300_rs_block rs = {0}; - int i, col_count = 0, tex_count = 0, fp_offset = 0, count, loc = 0; + int i, col_count = 0, tex_count = 0, fp_offset = 0, count, loc = 0, tex_ptr = 0; void (*rX00_rs_col)(struct r300_rs_block*, int, int, enum r300_rs_swizzle); void (*rX00_rs_col_write)(struct r300_rs_block*, int, int); void (*rX00_rs_tex)(struct r300_rs_block*, int, int, enum r300_rs_swizzle); @@ -393,8 +391,9 @@ static void r300_update_rs_block(struct r300_context *r300) stream_loc_notcl[loc++] = 6 + tex_count; /* Rasterize it. */ - rX00_rs_tex(&rs, tex_count, tex_count, SWIZ_XYZW); + rX00_rs_tex(&rs, tex_count, tex_ptr, SWIZ_XYZW); tex_count++; + tex_ptr += 4; } } } @@ -412,7 +411,7 @@ static void r300_update_rs_block(struct r300_context *r300) } /* Rasterize it. */ - rX00_rs_tex(&rs, tex_count, tex_count, + rX00_rs_tex(&rs, tex_count, tex_ptr, sprite_coord ? SWIZ_XY01 : SWIZ_XYZW); /* Write it to the FS input register if it's needed by the FS. */ @@ -429,6 +428,7 @@ static void r300_update_rs_block(struct r300_context *r300) i, sprite_coord ? " (sprite coord)" : ""); } tex_count++; + tex_ptr += sprite_coord ? 2 : 4; } else { /* Skip the FS input register, leave it uninitialized. */ /* If we try to set it to (0,0,0,1), it will lock up. */ @@ -449,7 +449,7 @@ static void r300_update_rs_block(struct r300_context *r300) stream_loc_notcl[loc++] = 6 + tex_count; /* Rasterize it. */ - rX00_rs_tex(&rs, tex_count, tex_count, SWIZ_X001); + rX00_rs_tex(&rs, tex_count, tex_ptr, SWIZ_X001); /* Write it to the FS input register if it's needed by the FS. */ if (fs_inputs->fog != ATTR_UNUSED) { @@ -461,6 +461,7 @@ static void r300_update_rs_block(struct r300_context *r300) DBG(r300, DBG_RS, "r300: Rasterized fog unused.\n"); } tex_count++; + tex_ptr += 4; } else { /* Skip the FS input register, leave it uninitialized. */ /* If we try to set it to (0,0,0,1), it will lock up. */ @@ -480,7 +481,7 @@ static void r300_update_rs_block(struct r300_context *r300) stream_loc_notcl[loc++] = 6 + tex_count; /* Rasterize it. */ - rX00_rs_tex(&rs, tex_count, tex_count, SWIZ_XYZW); + rX00_rs_tex(&rs, tex_count, tex_ptr, SWIZ_XYZW); /* Write it to the FS input register. */ rX00_rs_tex_write(&rs, tex_count, fp_offset); @@ -489,6 +490,7 @@ static void r300_update_rs_block(struct r300_context *r300) fp_offset++; tex_count++; + tex_ptr += 4; } /* Invalidate the rest of the no-TCL (GA) stream locations. */ @@ -507,7 +509,7 @@ static void r300_update_rs_block(struct r300_context *r300) DBG(r300, DBG_RS, "r300: --- Rasterizer status ---: colors: %i, " "generics: %i.\n", col_count, tex_count); - rs.count = (tex_count*4) | (col_count << R300_IC_COUNT_SHIFT) | + rs.count = MIN2(tex_ptr, 32) | (col_count << R300_IC_COUNT_SHIFT) | R300_HIRES_EN; count = MAX3(col_count, tex_count, 1); -- cgit v1.2.3 From 313df4156279f84ebc5b98a7540820b994762650 Mon Sep 17 00:00:00 2001 From: Jerome Glisse Date: Mon, 2 Aug 2010 14:21:33 -0400 Subject: r600g: add autogenerated reg definition + debug print cleanup Signed-off-by: Jerome Glisse --- src/gallium/drivers/r600/r600_reg.h | 72 ------------ src/gallium/drivers/r600/r600_screen.c | 6 +- src/gallium/drivers/r600/r600_shader.c | 2 +- src/gallium/drivers/r600/r600_state.c | 38 +++---- src/gallium/drivers/r600/r600_state_inlines.h | 152 +++++++++++++------------- src/gallium/drivers/r600/r600d.h | 97 ++++++++++++++++ 6 files changed, 194 insertions(+), 173 deletions(-) delete mode 100644 src/gallium/drivers/r600/r600_reg.h (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/r600_reg.h b/src/gallium/drivers/r600/r600_reg.h deleted file mode 100644 index 2600875d5c..0000000000 --- a/src/gallium/drivers/r600/r600_reg.h +++ /dev/null @@ -1,72 +0,0 @@ -#ifndef R600_REG_H -#define R600_REG_H - -/* for regs which haven't been generated yet */ - -#define R600_BLEND_ZERO 0 -#define R600_BLEND_ONE 1 -#define R600_BLEND_SRC_COLOR 2 -#define R600_BLEND_ONE_MINUS_SRC_COLOR 3 -#define R600_BLEND_SRC_ALPHA 4 -#define R600_BLEND_ONE_MINUS_SRC_ALPHA 5 -#define R600_BLEND_DST_ALPHA 6 -#define R600_BLEND_ONE_MINUS_DST_ALPHA 7 -#define R600_BLEND_DST_COLOR 8 -#define R600_BLEND_ONE_MINUS_DST_COLOR 9 -#define R600_BLEND_SRC_ALPHA_SATURATE 10 -#define R600_BLEND_BOTH_SRC_ALPHA 11 -#define R600_BLEND_BOTH_INV_SRC_ALPHA 12 -#define R600_BLEND_CONST_COLOR 13 -#define R600_BLEND_ONE_MINUS_CONST_COLOR 14 -#define R600_BLEND_SRC1_COLOR 15 -#define R600_BLEND_INV_SRC1_COLOR 16 -#define R600_BLEND_SRC1_ALPHA 17 -#define R600_BLEND_INV_SRC1_ALPHA 18 -#define R600_BLEND_CONST_ALPHA 19 -#define R600_BLEND_ONE_MINUS_CONST_ALPHA 20 - -#define R600_BLEND_FCN_ADD 0 -#define R600_BLEND_FCN_SUBTRACT 1 -#define R600_BLEND_FCN_MIN 2 -#define R600_BLEND_FCN_MAX 3 -#define R600_BLEND_FCN_RSUB 4 - -#define CB_BLEND_COLOR_SRCBLEND_SHIFT 0 -#define CB_BLEND_COLOR_COMB_FCN_SHIFT 5 -#define CB_BLEND_COLOR_DESTBLEND_SHIFT 8 -#define CB_BLEND_ALPHA_SRCBLEND_SHIFT 16 -#define CB_BLEND_ALPHA_COMB_FCN_SHIFT 21 -#define CB_BLEND_ALPHA_DESTBLEND_SHIFT 24 -#define CB_BLEND_SEPARATE_ALPHA_BLEND (1 << 29) - -#define SX_ALPHA_TEST_FUNC_SHIFT (0) -#define SX_ALPHA_TEST_ENABLE (1 << 3) - -#define R600_ZS_KEEP 0 -#define R600_ZS_ZERO 1 -#define R600_ZS_REPLACE 2 -#define R600_ZS_INCR 3 -#define R600_ZS_DECR 4 -#define R600_ZS_INVERT 5 -#define R600_ZS_INCR_WRAP 6 -#define R600_ZS_DECR_WRAP 7 - -#define R600_STENCILREF_SHIFT 0 -#define R600_STENCILMASK_SHIFT 8 -#define R600_STENCILWRITEMASK_SHIFT 16 - -#define PA_SU_PS_WIDTH_SHIFT 16 - -#define PA_SU_CULL_FRONT (1 << 0) -#define PA_SU_CULL_BACK (1 << 1) -#define PA_SU_FACE_CCW (0 << 2) -#define PA_SU_FACE_CW (1 << 2) - -#define PA_SU_POLYMODE_FRONT_SHIFT 5 -#define PA_SU_POLYMODE_BACK_SHIFT 5 -#define POLYGON_MODE_POINT 0 -#define POLYGON_MODE_LINE 1 -#define POLYGON_MODE_TRI 2 - - -#endif diff --git a/src/gallium/drivers/r600/r600_screen.c b/src/gallium/drivers/r600/r600_screen.c index e0d74ca558..68615ca162 100644 --- a/src/gallium/drivers/r600/r600_screen.c +++ b/src/gallium/drivers/r600/r600_screen.c @@ -106,7 +106,7 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param) case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER: return 0; default: - debug_printf("r600: unknown param %d\n", param); + R600_ERR("r600: unknown param %d\n", param); return 0; } } @@ -124,7 +124,7 @@ static float r600_get_paramf(struct pipe_screen* pscreen, enum pipe_cap param) case PIPE_CAP_MAX_TEXTURE_LOD_BIAS: return 16.0f; default: - debug_printf("r600: unsupported paramf %d\n", param); + R600_ERR("r600: unsupported paramf %d\n", param); return 0.0f; } } @@ -137,7 +137,7 @@ static boolean r600_is_format_supported(struct pipe_screen* screen, unsigned geom_flags) { if (target >= PIPE_MAX_TEXTURE_TYPES) { - debug_printf("r600: unsupported texture type %d\n", target); + R600_ERR("r600: unsupported texture type %d\n", target); return FALSE; } switch (format) { diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index 0582839905..34c6a444a3 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -129,7 +129,7 @@ static int r600_pipe_shader_vs(struct pipe_context *ctx, struct r600_context_sta struct r600_screen *rscreen = r600_screen(ctx->screen); struct r600_shader *rshader = &rpshader->shader; struct radeon_state *state; - unsigned i, j, tmp; + unsigned i, tmp; rpshader->rstate = radeon_state_decref(rpshader->rstate); state = radeon_state(rscreen->rw, R600_VS_SHADER_TYPE, R600_VS_SHADER); diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index bbfe4da840..60270e18a6 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -32,7 +32,6 @@ #include "r600_context.h" #include "r600_resource.h" #include "r600d.h" -#include "r600_reg.h" #include "r600_state_inlines.h" static void *r600_create_blend_state(struct pipe_context *ctx, @@ -292,7 +291,7 @@ static void r600_set_constant_buffer(struct pipe_context *ctx, type = R600_PS_CONSTANT_TYPE; break; default: - fprintf(stderr, "%s:%d unsupported %d\n", __func__, __LINE__, shader); + R600_ERR("unsupported %d\n", shader); return; } if (buffer && buffer->width0 > 0) { @@ -418,8 +417,7 @@ static void r600_set_index_buffer(struct pipe_context *ctx, if (ib) { pipe_resource_reference(&rctx->index_buffer.buffer, ib->buffer); memcpy(&rctx->index_buffer, ib, sizeof(rctx->index_buffer)); - } - else { + } else { pipe_resource_reference(&rctx->index_buffer.buffer, NULL); memset(&rctx->index_buffer, 0, sizeof(rctx->index_buffer)); } @@ -643,15 +641,15 @@ static struct radeon_state *r600_blend(struct r600_context *rctx) if (!state->rt[i].blend_enable) continue; - bc |= r600_translate_blend_function(eqRGB) << CB_BLEND_COLOR_COMB_FCN_SHIFT; - bc |= r600_translate_blend_factor(srcRGB) << CB_BLEND_COLOR_SRCBLEND_SHIFT; - bc |= r600_translate_blend_factor(dstRGB) << CB_BLEND_COLOR_DESTBLEND_SHIFT; + bc |= S_028804_COLOR_COMB_FCN(r600_translate_blend_function(eqRGB)); + bc |= S_028804_COLOR_SRCBLEND(r600_translate_blend_factor(srcRGB)); + bc |= S_028804_COLOR_DESTBLEND(r600_translate_blend_factor(dstRGB)); if (srcA != srcRGB || dstA != dstRGB || eqA != eqRGB) { - bc |= CB_BLEND_SEPARATE_ALPHA_BLEND; - bc |= r600_translate_blend_function(eqA) << CB_BLEND_ALPHA_COMB_FCN_SHIFT; - bc |= r600_translate_blend_factor(srcA) << CB_BLEND_ALPHA_SRCBLEND_SHIFT; - bc |= r600_translate_blend_factor(dstA) << CB_BLEND_ALPHA_DESTBLEND_SHIFT; + bc |= S_028804_SEPARATE_ALPHA_BLEND(1); + bc |= S_028804_ALPHA_COMB_FCN(r600_translate_blend_function(eqA)); + bc |= S_028804_ALPHA_SRCBLEND(r600_translate_blend_factor(srcA)); + bc |= S_028804_ALPHA_DESTBLEND(r600_translate_blend_factor(dstA)); } rstate->states[R600_BLEND__CB_BLEND0_CONTROL + i] = bc; @@ -895,26 +893,25 @@ static struct radeon_state *r600_dsa(struct r600_context *rctx) db_depth_control |= S_028800_STENCILFAIL(r600_translate_stencil_op(state->stencil[0].fail_op)); db_depth_control |= S_028800_STENCILZPASS(r600_translate_stencil_op(state->stencil[0].zpass_op)); db_depth_control |= S_028800_STENCILZFAIL(r600_translate_stencil_op(state->stencil[0].zfail_op)); - db_depth_control |= S_028800_BACKFACE_ENABLE(state->stencil[1].enabled); - stencil_ref_mask = (state->stencil[0].valuemask << R600_STENCILMASK_SHIFT) | - (state->stencil[0].writemask << R600_STENCILWRITEMASK_SHIFT); + stencil_ref_mask = S_028430_STENCILMASK(state->stencil[0].valuemask) | + S_028430_STENCILWRITEMASK(state->stencil[0].writemask); if (state->stencil[1].enabled) { db_depth_control |= S_028800_STENCILFUNC_BF(r600_translate_ds_func(state->stencil[1].func)); db_depth_control |= S_028800_STENCILFAIL_BF(r600_translate_stencil_op(state->stencil[1].fail_op)); db_depth_control |= S_028800_STENCILZPASS_BF(r600_translate_stencil_op(state->stencil[1].zpass_op)); db_depth_control |= S_028800_STENCILZFAIL_BF(r600_translate_stencil_op(state->stencil[1].zfail_op)); - stencil_ref_mask_bf = (state->stencil[1].valuemask << R600_STENCILMASK_SHIFT) | - (state->stencil[1].writemask << R600_STENCILWRITEMASK_SHIFT); + stencil_ref_mask_bf = S_028434_STENCILMASK_BF(state->stencil[1].valuemask) | + S_028434_STENCILWRITEMASK_BF(state->stencil[1].writemask); } } alpha_test_control = 0; alpha_ref = 0; if (state->alpha.enabled) { - alpha_test_control = (state->alpha.func) << 0; - alpha_test_control |= SX_ALPHA_TEST_ENABLE; + alpha_test_control = S_028410_ALPHA_FUNC(state->alpha.func); + alpha_test_control |= S_028410_ALPHA_TEST_ENABLE(1); alpha_ref = fui(state->alpha.ref_value); } @@ -1108,7 +1105,10 @@ static struct radeon_state *r600_resource(struct r600_context *rctx, if (r600_conv_pipe_format(view->texture->format, &format)) return NULL; desc = util_format_description(view->texture->format); - assert(desc == NULL); + if (desc == NULL) { + R600_ERR("unknow format %d\n", view->texture->format); + return NULL; + } rstate = radeon_state(rscreen->rw, R600_PS_RESOURCE_TYPE, id); if (rstate == NULL) { return NULL; diff --git a/src/gallium/drivers/r600/r600_state_inlines.h b/src/gallium/drivers/r600/r600_state_inlines.h index 369263dc8f..26a5dd0432 100644 --- a/src/gallium/drivers/r600/r600_state_inlines.h +++ b/src/gallium/drivers/r600/r600_state_inlines.h @@ -18,79 +18,75 @@ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. */ - + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ #ifndef R600_STATE_INLINES_H #define R600_STATE_INLINES_H -#include "r600_reg.h" - static INLINE uint32_t r600_translate_blend_function(int blend_func) { switch (blend_func) { - case PIPE_BLEND_ADD: - return R600_BLEND_FCN_ADD; - case PIPE_BLEND_SUBTRACT: - return R600_BLEND_FCN_SUBTRACT; - case PIPE_BLEND_REVERSE_SUBTRACT: - return R600_BLEND_FCN_RSUB; - case PIPE_BLEND_MIN: - return R600_BLEND_FCN_MIN; - case PIPE_BLEND_MAX: - return R600_BLEND_FCN_MAX; - default: - fprintf(stderr, "r600: Unknown blend function %d\n", blend_func); - assert(0); - break; - } - return 0; + case PIPE_BLEND_ADD: + return V_028804_COMB_DST_PLUS_SRC; + case PIPE_BLEND_SUBTRACT: + return V_028804_COMB_SRC_MINUS_DST; + case PIPE_BLEND_REVERSE_SUBTRACT: + return V_028804_COMB_DST_MINUS_SRC; + case PIPE_BLEND_MIN: + return V_028804_COMB_MIN_DST_SRC; + case PIPE_BLEND_MAX: + return V_028804_COMB_MAX_DST_SRC; + default: + R600_ERR("Unknown blend function %d\n", blend_func); + assert(0); + break; + } + return 0; } static INLINE uint32_t r600_translate_blend_factor(int blend_fact) { switch (blend_fact) { - case PIPE_BLENDFACTOR_ONE: - return R600_BLEND_ZERO; - case PIPE_BLENDFACTOR_SRC_COLOR: - return R600_BLEND_SRC_COLOR; - case PIPE_BLENDFACTOR_SRC_ALPHA: - return R600_BLEND_SRC_ALPHA; - case PIPE_BLENDFACTOR_DST_ALPHA: - return R600_BLEND_DST_ALPHA; - case PIPE_BLENDFACTOR_DST_COLOR: - return R600_BLEND_DST_COLOR; - case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: - return R600_BLEND_SRC_ALPHA_SATURATE; - case PIPE_BLENDFACTOR_CONST_COLOR: - return R600_BLEND_CONST_COLOR; - case PIPE_BLENDFACTOR_CONST_ALPHA: - return R600_BLEND_CONST_ALPHA; - case PIPE_BLENDFACTOR_ZERO: - return R600_BLEND_ZERO; - case PIPE_BLENDFACTOR_INV_SRC_COLOR: - return R600_BLEND_ONE_MINUS_SRC_COLOR; - case PIPE_BLENDFACTOR_INV_SRC_ALPHA: - return R600_BLEND_ONE_MINUS_SRC_ALPHA; - case PIPE_BLENDFACTOR_INV_DST_ALPHA: - return R600_BLEND_ONE_MINUS_DST_ALPHA; - case PIPE_BLENDFACTOR_INV_DST_COLOR: - return R600_BLEND_ONE_MINUS_DST_COLOR; - case PIPE_BLENDFACTOR_INV_CONST_COLOR: - return R600_BLEND_ONE_MINUS_CONST_COLOR; - case PIPE_BLENDFACTOR_INV_CONST_ALPHA: - return R600_BLEND_ONE_MINUS_CONST_ALPHA; - - case PIPE_BLENDFACTOR_SRC1_COLOR: - return R600_BLEND_SRC1_COLOR; - case PIPE_BLENDFACTOR_SRC1_ALPHA: - return R600_BLEND_SRC1_ALPHA; - case PIPE_BLENDFACTOR_INV_SRC1_COLOR: - return R600_BLEND_INV_SRC1_COLOR; - case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: - return R600_BLEND_INV_SRC1_ALPHA; + case PIPE_BLENDFACTOR_ONE: + return V_028804_BLEND_ZERO; + case PIPE_BLENDFACTOR_SRC_COLOR: + return V_028804_BLEND_SRC_COLOR; + case PIPE_BLENDFACTOR_SRC_ALPHA: + return V_028804_BLEND_SRC_ALPHA; + case PIPE_BLENDFACTOR_DST_ALPHA: + return V_028804_BLEND_DST_ALPHA; + case PIPE_BLENDFACTOR_DST_COLOR: + return V_028804_BLEND_DST_COLOR; + case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: + return V_028804_BLEND_SRC_ALPHA_SATURATE; + case PIPE_BLENDFACTOR_CONST_COLOR: + return V_028804_BLEND_CONST_COLOR; + case PIPE_BLENDFACTOR_CONST_ALPHA: + return V_028804_BLEND_CONST_ALPHA; + case PIPE_BLENDFACTOR_ZERO: + return V_028804_BLEND_ZERO; + case PIPE_BLENDFACTOR_INV_SRC_COLOR: + return V_028804_BLEND_ONE_MINUS_SRC_COLOR; + case PIPE_BLENDFACTOR_INV_SRC_ALPHA: + return V_028804_BLEND_ONE_MINUS_SRC_ALPHA; + case PIPE_BLENDFACTOR_INV_DST_ALPHA: + return V_028804_BLEND_ONE_MINUS_DST_ALPHA; + case PIPE_BLENDFACTOR_INV_DST_COLOR: + return V_028804_BLEND_ONE_MINUS_DST_COLOR; + case PIPE_BLENDFACTOR_INV_CONST_COLOR: + return V_028804_BLEND_ONE_MINUS_CONST_COLOR; + case PIPE_BLENDFACTOR_INV_CONST_ALPHA: + return V_028804_BLEND_ONE_MINUS_CONST_ALPHA; + case PIPE_BLENDFACTOR_SRC1_COLOR: + return V_028804_BLEND_SRC1_COLOR; + case PIPE_BLENDFACTOR_SRC1_ALPHA: + return V_028804_BLEND_SRC1_ALPHA; + case PIPE_BLENDFACTOR_INV_SRC1_COLOR: + return V_028804_BLEND_INV_SRC1_COLOR; + case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: + return V_028804_BLEND_INV_SRC1_ALPHA; default: - fprintf(stderr, "r600: Implementation error: " - "Bad blend factor %d not supported!\n", blend_fact); + R600_ERR("Bad blend factor %d not supported!\n", blend_fact); assert(0); break; } @@ -101,23 +97,23 @@ static INLINE uint32_t r600_translate_stencil_op(int s_op) { switch (s_op) { case PIPE_STENCIL_OP_KEEP: - return R600_ZS_KEEP; - case PIPE_STENCIL_OP_ZERO: - return R600_ZS_ZERO; - case PIPE_STENCIL_OP_REPLACE: - return R600_ZS_REPLACE; - case PIPE_STENCIL_OP_INCR: - return R600_ZS_INCR; - case PIPE_STENCIL_OP_DECR: - return R600_ZS_DECR; - case PIPE_STENCIL_OP_INCR_WRAP: - return R600_ZS_INCR_WRAP; - case PIPE_STENCIL_OP_DECR_WRAP: - return R600_ZS_DECR_WRAP; - case PIPE_STENCIL_OP_INVERT: - return R600_ZS_INVERT; - default: - fprintf(stderr, "r600: Unknown stencil op %d", s_op); + return V_028800_STENCIL_KEEP; + case PIPE_STENCIL_OP_ZERO: + return V_028800_STENCIL_ZERO; + case PIPE_STENCIL_OP_REPLACE: + return V_028800_STENCIL_REPLACE; + case PIPE_STENCIL_OP_INCR: + return V_028800_STENCIL_INCR; + case PIPE_STENCIL_OP_DECR: + return V_028800_STENCIL_DECR; + case PIPE_STENCIL_OP_INCR_WRAP: + return V_028800_STENCIL_INVERT; + case PIPE_STENCIL_OP_DECR_WRAP: + return V_028800_STENCIL_DECR_WRAP; + case PIPE_STENCIL_OP_INVERT: + return V_028800_STENCIL_INVERT; + default: + R600_ERR("Unknown stencil op %d", s_op); assert(0); break; } diff --git a/src/gallium/drivers/r600/r600d.h b/src/gallium/drivers/r600/r600d.h index c1acfcd29e..6085596ea8 100644 --- a/src/gallium/drivers/r600/r600d.h +++ b/src/gallium/drivers/r600/r600d.h @@ -249,6 +249,16 @@ #define S_028060_SLICE_TILE_MAX(x) (((x) & 0xFFFFF) << 10) #define G_028060_SLICE_TILE_MAX(x) (((x) >> 10) & 0xFFFFF) #define C_028060_SLICE_TILE_MAX 0xC00003FF +#define R_028410_SX_ALPHA_TEST_CONTROL 0x028410 +#define S_028410_ALPHA_FUNC(x) (((x) & 0x7) << 0) +#define G_028410_ALPHA_FUNC(x) (((x) >> 0) & 0x7) +#define C_028410_ALPHA_FUNC 0xFFFFFFF8 +#define S_028410_ALPHA_TEST_ENABLE(x) (((x) & 0x1) << 3) +#define G_028410_ALPHA_TEST_ENABLE(x) (((x) >> 3) & 0x1) +#define C_028410_ALPHA_TEST_ENABLE 0xFFFFFFF7 +#define S_028410_ALPHA_TEST_BYPASS(x) (((x) & 0x1) << 8) +#define G_028410_ALPHA_TEST_BYPASS(x) (((x) >> 8) & 0x1) +#define C_028410_ALPHA_TEST_BYPASS 0xFFFFFEFF #define R_028800_DB_DEPTH_CONTROL 0x028800 #define S_028800_STENCIL_ENABLE(x) (((x) & 0x1) << 0) #define G_028800_STENCIL_ENABLE(x) (((x) >> 0) & 0x1) @@ -268,9 +278,25 @@ #define S_028800_STENCILFUNC(x) (((x) & 0x7) << 8) #define G_028800_STENCILFUNC(x) (((x) >> 8) & 0x7) #define C_028800_STENCILFUNC 0xFFFFF8FF +#define V_028800_STENCILFUNC_NEVER 0x00000000 +#define V_028800_STENCILFUNC_LESS 0x00000001 +#define V_028800_STENCILFUNC_EQUAL 0x00000002 +#define V_028800_STENCILFUNC_LEQUAL 0x00000003 +#define V_028800_STENCILFUNC_GREATER 0x00000004 +#define V_028800_STENCILFUNC_NOTEQUAL 0x00000005 +#define V_028800_STENCILFUNC_GEQUAL 0x00000006 +#define V_028800_STENCILFUNC_ALWAYS 0x00000007 #define S_028800_STENCILFAIL(x) (((x) & 0x7) << 11) #define G_028800_STENCILFAIL(x) (((x) >> 11) & 0x7) #define C_028800_STENCILFAIL 0xFFFFC7FF +#define V_028800_STENCIL_KEEP 0x00000000 +#define V_028800_STENCIL_ZERO 0x00000001 +#define V_028800_STENCIL_REPLACE 0x00000002 +#define V_028800_STENCIL_INCR 0x00000003 +#define V_028800_STENCIL_DECR 0x00000004 +#define V_028800_STENCIL_INVERT 0x00000005 +#define V_028800_STENCIL_INCR_WRAP 0x00000006 +#define V_028800_STENCIL_DECR_WRAP 0x00000007 #define S_028800_STENCILZPASS(x) (((x) & 0x7) << 14) #define G_028800_STENCILZPASS(x) (((x) >> 14) & 0x7) #define C_028800_STENCILZPASS 0xFFFE3FFF @@ -316,6 +342,77 @@ #define S_028010_ZRANGE_PRECISION(x) (((x) & 0x1) << 31) #define G_028010_ZRANGE_PRECISION(x) (((x) >> 31) & 0x1) #define C_028010_ZRANGE_PRECISION 0x7FFFFFFF +#define R_028430_DB_STENCILREFMASK 0x028430 +#define S_028430_STENCILREF(x) (((x) & 0xFF) << 0) +#define G_028430_STENCILREF(x) (((x) >> 0) & 0xFF) +#define C_028430_STENCILREF 0xFFFFFF00 +#define S_028430_STENCILMASK(x) (((x) & 0xFF) << 8) +#define G_028430_STENCILMASK(x) (((x) >> 8) & 0xFF) +#define C_028430_STENCILMASK 0xFFFF00FF +#define S_028430_STENCILWRITEMASK(x) (((x) & 0xFF) << 16) +#define G_028430_STENCILWRITEMASK(x) (((x) >> 16) & 0xFF) +#define C_028430_STENCILWRITEMASK 0xFF00FFFF +#define R_028434_DB_STENCILREFMASK_BF 0x028434 +#define S_028434_STENCILREF_BF(x) (((x) & 0xFF) << 0) +#define G_028434_STENCILREF_BF(x) (((x) >> 0) & 0xFF) +#define C_028434_STENCILREF_BF 0xFFFFFF00 +#define S_028434_STENCILMASK_BF(x) (((x) & 0xFF) << 8) +#define G_028434_STENCILMASK_BF(x) (((x) >> 8) & 0xFF) +#define C_028434_STENCILMASK_BF 0xFFFF00FF +#define S_028434_STENCILWRITEMASK_BF(x) (((x) & 0xFF) << 16) +#define G_028434_STENCILWRITEMASK_BF(x) (((x) >> 16) & 0xFF) +#define C_028434_STENCILWRITEMASK_BF 0xFF00FFFF +#define R_028804_CB_BLEND_CONTROL 0x028804 +#define S_028804_COLOR_SRCBLEND(x) (((x) & 0x1F) << 0) +#define G_028804_COLOR_SRCBLEND(x) (((x) >> 0) & 0x1F) +#define C_028804_COLOR_SRCBLEND 0xFFFFFFE0 +#define V_028804_BLEND_ZERO 0x00000000 +#define V_028804_BLEND_ONE 0x00000001 +#define V_028804_BLEND_SRC_COLOR 0x00000002 +#define V_028804_BLEND_ONE_MINUS_SRC_COLOR 0x00000003 +#define V_028804_BLEND_SRC_ALPHA 0x00000004 +#define V_028804_BLEND_ONE_MINUS_SRC_ALPHA 0x00000005 +#define V_028804_BLEND_DST_ALPHA 0x00000006 +#define V_028804_BLEND_ONE_MINUS_DST_ALPHA 0x00000007 +#define V_028804_BLEND_DST_COLOR 0x00000008 +#define V_028804_BLEND_ONE_MINUS_DST_COLOR 0x00000009 +#define V_028804_BLEND_SRC_ALPHA_SATURATE 0x0000000A +#define V_028804_BLEND_BOTH_SRC_ALPHA 0x0000000B +#define V_028804_BLEND_BOTH_INV_SRC_ALPHA 0x0000000C +#define V_028804_BLEND_CONST_COLOR 0x0000000D +#define V_028804_BLEND_ONE_MINUS_CONST_COLOR 0x0000000E +#define V_028804_BLEND_SRC1_COLOR 0x0000000F +#define V_028804_BLEND_INV_SRC1_COLOR 0x00000010 +#define V_028804_BLEND_SRC1_ALPHA 0x00000011 +#define V_028804_BLEND_INV_SRC1_ALPHA 0x00000012 +#define V_028804_BLEND_CONST_ALPHA 0x00000013 +#define V_028804_BLEND_ONE_MINUS_CONST_ALPHA 0x00000014 +#define S_028804_COLOR_COMB_FCN(x) (((x) & 0x7) << 5) +#define G_028804_COLOR_COMB_FCN(x) (((x) >> 5) & 0x7) +#define C_028804_COLOR_COMB_FCN 0xFFFFFF1F +#define V_028804_COMB_DST_PLUS_SRC 0x00000000 +#define V_028804_COMB_SRC_MINUS_DST 0x00000001 +#define V_028804_COMB_MIN_DST_SRC 0x00000002 +#define V_028804_COMB_MAX_DST_SRC 0x00000003 +#define V_028804_COMB_DST_MINUS_SRC 0x00000004 +#define S_028804_COLOR_DESTBLEND(x) (((x) & 0x1F) << 8) +#define G_028804_COLOR_DESTBLEND(x) (((x) >> 8) & 0x1F) +#define C_028804_COLOR_DESTBLEND 0xFFFFE0FF +#define S_028804_OPACITY_WEIGHT(x) (((x) & 0x1) << 13) +#define G_028804_OPACITY_WEIGHT(x) (((x) >> 13) & 0x1) +#define C_028804_OPACITY_WEIGHT 0xFFFFDFFF +#define S_028804_ALPHA_SRCBLEND(x) (((x) & 0x1F) << 16) +#define G_028804_ALPHA_SRCBLEND(x) (((x) >> 16) & 0x1F) +#define C_028804_ALPHA_SRCBLEND 0xFFE0FFFF +#define S_028804_ALPHA_COMB_FCN(x) (((x) & 0x7) << 21) +#define G_028804_ALPHA_COMB_FCN(x) (((x) >> 21) & 0x7) +#define C_028804_ALPHA_COMB_FCN 0xFF1FFFFF +#define S_028804_ALPHA_DESTBLEND(x) (((x) & 0x1F) << 24) +#define G_028804_ALPHA_DESTBLEND(x) (((x) >> 24) & 0x1F) +#define C_028804_ALPHA_DESTBLEND 0xE0FFFFFF +#define S_028804_SEPARATE_ALPHA_BLEND(x) (((x) & 0x1) << 29) +#define G_028804_SEPARATE_ALPHA_BLEND(x) (((x) >> 29) & 0x1) +#define C_028804_SEPARATE_ALPHA_BLEND 0xDFFFFFFF #define R_028814_PA_SU_SC_MODE_CNTL 0x028814 #define S_028814_CULL_FRONT(x) (((x) & 0x1) << 0) #define G_028814_CULL_FRONT(x) (((x) >> 0) & 0x1) -- cgit v1.2.3 From f031817450fe75d3224f767d79938813287ac445 Mon Sep 17 00:00:00 2001 From: Jerome Glisse Date: Mon, 2 Aug 2010 17:41:52 -0400 Subject: r600g: split alu block to conform to limit + RCP opcode Signed-off-by: Jerome Glisse --- src/gallium/drivers/r600/r600_asm.c | 4 +++- src/gallium/drivers/r600/r600_shader.c | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c index e560f65dcd..386adde6b8 100644 --- a/src/gallium/drivers/r600/r600_asm.c +++ b/src/gallium/drivers/r600/r600_asm.c @@ -118,7 +118,9 @@ int r600_bc_add_alu(struct r600_bc *bc, const struct r600_bc_alu *alu) nalu->nliteral = 0; /* cf can contains only alu or only vtx or only tex */ - if (bc->cf_last == NULL || bc->cf_last->inst != (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3)) { + if (bc->cf_last == NULL || bc->cf_last->inst != (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3) || + (bc->cf_last->ndw >> 1) >= 120) { + /* at most 128 slots, one add alu can add 4 slots + 4 constant worst case */ r = r600_bc_add_cf(bc); if (r) { free(nalu); diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index 34c6a444a3..5bb16bbd3e 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -1044,7 +1044,7 @@ static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = { {TGSI_OPCODE_ARL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_MOV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2}, {TGSI_OPCODE_LIT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit}, - {TGSI_OPCODE_RCP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_RCP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE, tgsi_trans}, {TGSI_OPCODE_RSQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE, tgsi_trans}, {TGSI_OPCODE_EXP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_LOG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, -- cgit v1.2.3 From 57438adf3217955f16491ef8deeffafe05c2f7f8 Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Tue, 3 Aug 2010 14:15:24 +0200 Subject: r300g: handle polygon offset correctly https://bugs.freedesktop.org/show_bug.cgi?id=29372 --- src/gallium/drivers/r300/r300_state.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c index bc0c86d8a4..a3383c3878 100644 --- a/src/gallium/drivers/r300/r300_state.c +++ b/src/gallium/drivers/r300/r300_state.c @@ -744,7 +744,7 @@ static void r300_mark_fb_state_dirty(r300, R300_CHANGED_FB_STATE); /* Polygon offset depends on the zbuffer bit depth. */ - if (state->zsbuf && r300->polygon_offset_enabled) { + if (state->zsbuf) { switch (util_format_get_blocksize(state->zsbuf->texture->format)) { case 2: zbuffer_bpp = 16; @@ -756,7 +756,9 @@ static void if (r300->zbuffer_bpp != zbuffer_bpp) { r300->zbuffer_bpp = zbuffer_bpp; - r300->rs_state.dirty = TRUE; + + if (r300->polygon_offset_enabled) + r300->rs_state.dirty = TRUE; } } @@ -1095,9 +1097,7 @@ static void r300_bind_rs_state(struct pipe_context* pipe, void* state) } if (rs) { - r300->polygon_offset_enabled = (rs->rs.offset_point || - rs->rs.offset_line || - rs->rs.offset_tri); + r300->polygon_offset_enabled = rs->polygon_offset_enable; r300->sprite_coord_enable = rs->rs.sprite_coord_enable; r300->two_sided_color = rs->rs.light_twoside; } else { -- cgit v1.2.3 From 7e42b7e5d2aebcda0e6bf081b6661411731e6df2 Mon Sep 17 00:00:00 2001 From: Jerome Glisse Date: Tue, 3 Aug 2010 14:14:58 -0400 Subject: r600g: fix LIT + fix multiple constant one ALU + fix ALU block splitting Make sure LIT fills all slot for instruction (can't do W instruction without having the Z slot filled with at least a NOP). ALU instruction can't access more than 4 constant, move constant to temporary reg if we reach the limit. Fix ALU block splitting, only split ALU after ALU with last instruction bit sets. Signed-off-by: Jerome Glisse --- src/gallium/drivers/r600/Makefile | 2 +- src/gallium/drivers/r600/r600_asm.c | 6 +- src/gallium/drivers/r600/r600_asm.h | 1 + src/gallium/drivers/r600/r600_context.h | 4 +- src/gallium/drivers/r600/r600_shader.c | 307 ++++++++++++++++++++------------ 5 files changed, 207 insertions(+), 113 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/Makefile b/src/gallium/drivers/r600/Makefile index 8f1e1366b5..fc94ae71f4 100644 --- a/src/gallium/drivers/r600/Makefile +++ b/src/gallium/drivers/r600/Makefile @@ -9,6 +9,7 @@ LIBRARY_INCLUDES = \ C_SOURCES = \ r600_buffer.c \ r600_context.c \ + r600_shader.c \ r600_draw.c \ r600_blit.c \ r600_helper.c \ @@ -17,7 +18,6 @@ C_SOURCES = \ r600_screen.c \ r600_state.c \ r600_texture.c \ - r600_shader.c \ r600_asm.c \ r700_asm.c diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c index 386adde6b8..f1dc3dc3a9 100644 --- a/src/gallium/drivers/r600/r600_asm.c +++ b/src/gallium/drivers/r600/r600_asm.c @@ -91,6 +91,7 @@ static int r600_bc_add_cf(struct r600_bc *bc) bc->cf_last = cf; bc->ncf++; bc->ndw += 2; + bc->force_add_cf = 0; return 0; } @@ -119,7 +120,7 @@ int r600_bc_add_alu(struct r600_bc *bc, const struct r600_bc_alu *alu) /* cf can contains only alu or only vtx or only tex */ if (bc->cf_last == NULL || bc->cf_last->inst != (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3) || - (bc->cf_last->ndw >> 1) >= 120) { + bc->force_add_cf) { /* at most 128 slots, one add alu can add 4 slots + 4 constant worst case */ r = r600_bc_add_cf(bc); if (r) { @@ -128,6 +129,9 @@ int r600_bc_add_alu(struct r600_bc *bc, const struct r600_bc_alu *alu) } bc->cf_last->inst = V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3; } + if (alu->last && (bc->cf_last->ndw >> 1) >= 124) { + bc->force_add_cf = 1; + } /* number of gpr == the last gpr used in any alu */ for (i = 0; i < 3; i++) { if (alu->src[i].sel >= bc->ngpr && alu->src[i].sel < 128) { diff --git a/src/gallium/drivers/r600/r600_asm.h b/src/gallium/drivers/r600/r600_asm.h index 88fb957440..3fd94dbda0 100644 --- a/src/gallium/drivers/r600/r600_asm.h +++ b/src/gallium/drivers/r600/r600_asm.h @@ -127,6 +127,7 @@ struct r600_bc { unsigned ncf; unsigned ngpr; unsigned nresource; + unsigned force_add_cf; u32 *bytecode; }; diff --git a/src/gallium/drivers/r600/r600_context.h b/src/gallium/drivers/r600/r600_context.h index a1ee9577ba..f8fdce50dc 100644 --- a/src/gallium/drivers/r600/r600_context.h +++ b/src/gallium/drivers/r600/r600_context.h @@ -200,10 +200,10 @@ void r600_init_state_functions(struct r600_context *rctx); void r600_init_query_functions(struct r600_context* rctx); struct pipe_context *r600_create_context(struct pipe_screen *screen, void *priv); -int r600_pipe_shader_create(struct pipe_context *ctx, +extern int r600_pipe_shader_create(struct pipe_context *ctx, struct r600_context_state *rstate, const struct tgsi_token *tokens); -int r600_pipe_shader_update(struct pipe_context *ctx, +extern int r600_pipe_shader_update(struct pipe_context *ctx, struct r600_context_state *rstate); #define R600_ERR(fmt, args...) \ diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index 5bb16bbd3e..7d304f5ae8 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -469,30 +469,14 @@ static int tgsi_end(struct r600_shader_ctx *ctx) static int tgsi_src(struct r600_shader_ctx *ctx, const struct tgsi_full_src_register *tgsi_src, - unsigned swizzle, struct r600_bc_alu_src *r600_src) { + memset(r600_src, 0, sizeof(struct r600_bc_alu_src)); r600_src->sel = tgsi_src->Register.Index; if (tgsi_src->Register.File == TGSI_FILE_IMMEDIATE) { r600_src->sel = 0; } r600_src->sel += ctx->file_offset[tgsi_src->Register.File]; - switch (swizzle) { - case 0: - r600_src->chan = tgsi_src->Register.SwizzleX; - break; - case 1: - r600_src->chan = tgsi_src->Register.SwizzleY; - break; - case 2: - r600_src->chan = tgsi_src->Register.SwizzleZ; - break; - case 3: - r600_src->chan = tgsi_src->Register.SwizzleW; - break; - default: - return -EINVAL; - } return 0; } @@ -513,12 +497,70 @@ static int tgsi_dst(struct r600_shader_ctx *ctx, return 0; } +static unsigned tgsi_chan(const struct tgsi_full_src_register *tgsi_src, unsigned swizzle) +{ + switch (swizzle) { + case 0: + return tgsi_src->Register.SwizzleX; + case 1: + return tgsi_src->Register.SwizzleY; + case 2: + return tgsi_src->Register.SwizzleZ; + case 3: + return tgsi_src->Register.SwizzleW; + default: + return 0; + } +} + +static int tgsi_split_constant(struct r600_shader_ctx *ctx, struct r600_bc_alu_src r600_src[3]) +{ + struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; + struct r600_bc_alu alu; + int i, j, k, nconst, r; + + for (i = 0, nconst = 0; i < inst->Instruction.NumSrcRegs; i++) { + if (inst->Src[i].Register.File == TGSI_FILE_CONSTANT) { + nconst++; + } + r = tgsi_src(ctx, &inst->Src[i], &r600_src[i]); + if (r) { + return r; + } + } + for (i = 0, j = nconst - 1; i < inst->Instruction.NumSrcRegs; i++) { + if (inst->Src[j].Register.File == TGSI_FILE_CONSTANT && j > 0) { + for (k = 0; k < 4; k++) { + memset(&alu, 0, sizeof(struct r600_bc_alu)); + alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV; + alu.src[0].sel = r600_src[0].sel; + alu.src[0].chan = k; + alu.dst.sel = ctx->temp_reg + j; + alu.dst.chan = k; + alu.dst.write = 1; + if (k == 3) + alu.last = 1; + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + } + r600_src[0].sel = ctx->temp_reg + j; + j--; + } + } + return 0; +} + static int tgsi_op2(struct r600_shader_ctx *ctx) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; + struct r600_bc_alu_src r600_src[3]; struct r600_bc_alu alu; int i, j, r; + r = tgsi_split_constant(ctx, r600_src); + if (r) + return r; for (i = 0; i < 4; i++) { memset(&alu, 0, sizeof(struct r600_bc_alu)); if (!(inst->Dst[0].Register.WriteMask & (1 << i))) { @@ -527,9 +569,8 @@ static int tgsi_op2(struct r600_shader_ctx *ctx) } else { alu.inst = ctx->inst_info->r600_opcode; for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { - r = tgsi_src(ctx, &inst->Src[j], i, &alu.src[j]); - if (r) - return r; + alu.src[j] = r600_src[j]; + alu.src[j].chan = tgsi_chan(&inst->Src[j], i); } r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); if (r) @@ -567,9 +608,10 @@ static int tgsi_kill(struct r600_shader_ctx *ctx) alu.inst = ctx->inst_info->r600_opcode; alu.dst.chan = i; alu.src[0].sel = 248; - r = tgsi_src(ctx, &inst->Src[0], i, &alu.src[1]); + r = tgsi_src(ctx, &inst->Src[0], &alu.src[1]); if (r) return r; + alu.src[1].chan = tgsi_chan(&inst->Src[0], i); if (i == 3) { alu.last = 1; } @@ -583,9 +625,13 @@ static int tgsi_kill(struct r600_shader_ctx *ctx) static int tgsi_slt(struct r600_shader_ctx *ctx) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; + struct r600_bc_alu_src r600_src[3]; struct r600_bc_alu alu; int i, r; + r = tgsi_split_constant(ctx, r600_src); + if (r) + return r; for (i = 0; i < 4; i++) { memset(&alu, 0, sizeof(struct r600_bc_alu)); if (!(inst->Dst[0].Register.WriteMask & (1 << i))) { @@ -593,12 +639,10 @@ static int tgsi_slt(struct r600_shader_ctx *ctx) alu.dst.chan = i; } else { alu.inst = ctx->inst_info->r600_opcode; - r = tgsi_src(ctx, &inst->Src[0], i, &alu.src[1]); - if (r) - return r; - r = tgsi_src(ctx, &inst->Src[1], i, &alu.src[0]); - if (r) - return r; + alu.src[1] = r600_src[0]; + alu.src[1].chan = tgsi_chan(&inst->Src[0], i); + alu.src[0] = r600_src[1]; + alu.src[0].chan = tgsi_chan(&inst->Src[1], i); r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); if (r) return r; @@ -619,60 +663,56 @@ static int tgsi_lit(struct r600_shader_ctx *ctx) struct r600_bc_alu alu; int r; - if (inst->Dst[0].Register.WriteMask & (1 << 0)) - { - /* dst.x, <- 1.0 */ - memset(&alu, 0, sizeof(struct r600_bc_alu)); - alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV; - alu.src[0].sel = 249; /*1.0*/ - alu.src[0].chan = 0; - r = tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst); - if (r) - return r; - if ((inst->Dst[0].Register.WriteMask & 0xe) == 0) - alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); - if (r) - return r; - } + /* dst.x, <- 1.0 */ + memset(&alu, 0, sizeof(struct r600_bc_alu)); + alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV; + alu.src[0].sel = 249; /*1.0*/ + alu.src[0].chan = 0; + r = tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst); + if (r) + return r; + alu.dst.write = (inst->Dst[0].Register.WriteMask >> 0) & 1; + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + /* dst.y = max(src.x, 0.0) */ + memset(&alu, 0, sizeof(struct r600_bc_alu)); + alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX; + r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); + if (r) + return r; + alu.src[1].sel = 248; /*0.0*/ + alu.src[1].chan = tgsi_chan(&inst->Src[0], 0); + r = tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst); + if (r) + return r; + alu.dst.write = (inst->Dst[0].Register.WriteMask >> 1) & 1; + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; - if (inst->Dst[0].Register.WriteMask & (1 << 1)) - { - /* dst.y = max(src.x, 0.0) */ - memset(&alu, 0, sizeof(struct r600_bc_alu)); - alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX; - r = tgsi_src(ctx, &inst->Src[0], 0, &alu.src[0]); - if (r) - return r; - alu.src[1].sel = 248; /*0.0*/ - alu.src[1].chan = 0; - r = tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst); - if (r) - return r; - if ((inst->Dst[0].Register.WriteMask & 0xa) == 0) - alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); - if (r) - return r; - } + /* dst.z = NOP - fill Z slot */ + memset(&alu, 0, sizeof(struct r600_bc_alu)); + alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP; + alu.dst.chan = 2; + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; - if (inst->Dst[0].Register.WriteMask & (1 << 3)) - { - /* dst.w, <- 1.0 */ - memset(&alu, 0, sizeof(struct r600_bc_alu)); - alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV; - alu.src[0].sel = 249; - alu.src[0].chan = 0; - r = tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst); - if (r) - return r; - if ((inst->Dst[0].Register.WriteMask & 0x4) == 0) - alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); - if (r) - return r; - } + /* dst.w, <- 1.0 */ + memset(&alu, 0, sizeof(struct r600_bc_alu)); + alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV; + alu.src[0].sel = 249; + alu.src[0].chan = 0; + r = tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst); + if (r) + return r; + alu.dst.write = (inst->Dst[0].Register.WriteMask >> 3) & 1; + alu.last = 1; + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; if (inst->Dst[0].Register.WriteMask & (1 << 2)) { @@ -682,9 +722,10 @@ static int tgsi_lit(struct r600_shader_ctx *ctx) /* dst.z = log(src.y) */ memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED; - r = tgsi_src(ctx, &inst->Src[0], 1, &alu.src[0]); + r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); if (r) return r; + alu.src[0].chan = tgsi_chan(&inst->Src[0], 1); r = tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst); if (r) return r; @@ -699,14 +740,16 @@ static int tgsi_lit(struct r600_shader_ctx *ctx) /* tmp.x = amd MUL_LIT(src.w, dst.z, src.x ) */ memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT; - r = tgsi_src(ctx, &inst->Src[0], 3, &alu.src[0]); + r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); if (r) - return r; + return r; + alu.src[0].chan = tgsi_chan(&inst->Src[0], 3); alu.src[1].sel = sel; alu.src[1].chan = chan; - r = tgsi_src(ctx, &inst->Src[0], 0, &alu.src[2]); + r = tgsi_src(ctx, &inst->Src[0], &alu.src[2]); if (r) return r; + alu.src[2].chan = tgsi_chan(&inst->Src[0], 0); alu.dst.sel = ctx->temp_reg; alu.dst.chan = 0; alu.dst.write = 1; @@ -743,9 +786,10 @@ static int tgsi_trans(struct r600_shader_ctx *ctx) if (inst->Dst[0].Register.WriteMask & (1 << i)) { alu.inst = ctx->inst_info->r600_opcode; for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { - r = tgsi_src(ctx, &inst->Src[j], i, &alu.src[j]); + r = tgsi_src(ctx, &inst->Src[j], &alu.src[j]); if (r) return r; + alu.src[j].chan = tgsi_chan(&inst->Src[j], i); } r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); if (r) @@ -759,6 +803,45 @@ static int tgsi_trans(struct r600_shader_ctx *ctx) return 0; } +static int tgsi_trans_srcx_replicate(struct r600_shader_ctx *ctx) +{ + struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; + struct r600_bc_alu alu; + int i, j, r; + + memset(&alu, 0, sizeof(struct r600_bc_alu)); + alu.inst = ctx->inst_info->r600_opcode; + for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { + r = tgsi_src(ctx, &inst->Src[j], &alu.src[j]); + if (r) + return r; + alu.src[j].chan = tgsi_chan(&inst->Src[j], 0); + } + alu.dst.sel = ctx->temp_reg; + alu.dst.write = 1; + alu.last = 1; + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + /* replicate result */ + for (i = 0; i < 4; i++) { + memset(&alu, 0, sizeof(struct r600_bc_alu)); + alu.src[0].sel = ctx->temp_reg; + alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV; + alu.dst.chan = i; + r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); + if (r) + return r; + alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1; + if (i == 3) + alu.last = 1; + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + } + return 0; +} + static int tgsi_helper_copy(struct r600_shader_ctx *ctx, struct tgsi_full_instruction *inst) { struct r600_bc_alu alu; @@ -793,17 +876,20 @@ static int tgsi_helper_copy(struct r600_shader_ctx *ctx, struct tgsi_full_instru static int tgsi_op3(struct r600_shader_ctx *ctx) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; + struct r600_bc_alu_src r600_src[3]; struct r600_bc_alu alu; int i, j, r; + r = tgsi_split_constant(ctx, r600_src); + if (r) + return r; /* do it in 2 step as op3 doesn't support writemask */ for (i = 0; i < 4; i++) { memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = ctx->inst_info->r600_opcode; for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { - r = tgsi_src(ctx, &inst->Src[j], i, &alu.src[j]); - if (r) - return r; + alu.src[j] = r600_src[j]; + alu.src[j].chan = tgsi_chan(&inst->Src[j], i); } alu.dst.sel = ctx->temp_reg; alu.dst.chan = i; @@ -822,16 +908,19 @@ static int tgsi_op3(struct r600_shader_ctx *ctx) static int tgsi_dp(struct r600_shader_ctx *ctx) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; + struct r600_bc_alu_src r600_src[3]; struct r600_bc_alu alu; int i, j, r; + r = tgsi_split_constant(ctx, r600_src); + if (r) + return r; for (i = 0; i < 4; i++) { memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = ctx->inst_info->r600_opcode; for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { - r = tgsi_src(ctx, &inst->Src[j], i, &alu.src[j]); - if (r) - return r; + alu.src[j] = r600_src[j]; + alu.src[j].chan = tgsi_chan(&inst->Src[j], i); } alu.dst.sel = ctx->temp_reg; alu.dst.chan = i; @@ -878,7 +967,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE; alu.src[0].sel = src_gpr; - alu.src[0].chan = 3; + alu.src[0].chan = tgsi_chan(&inst->Src[0], 3); alu.dst.sel = ctx->temp_reg; alu.dst.chan = 3; alu.last = 1; @@ -892,7 +981,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) alu.src[0].sel = ctx->temp_reg; alu.src[0].chan = 3; alu.src[1].sel = src_gpr; - alu.src[1].chan = 0; + alu.src[1].chan = tgsi_chan(&inst->Src[0], 0); alu.dst.sel = ctx->temp_reg; alu.dst.chan = 0; alu.dst.write = 1; @@ -904,7 +993,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) alu.src[0].sel = ctx->temp_reg; alu.src[0].chan = 3; alu.src[1].sel = src_gpr; - alu.src[1].chan = 1; + alu.src[1].chan = tgsi_chan(&inst->Src[0], 1); alu.dst.sel = ctx->temp_reg; alu.dst.chan = 1; alu.dst.write = 1; @@ -916,7 +1005,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) alu.src[0].sel = ctx->temp_reg; alu.src[0].chan = 3; alu.src[1].sel = src_gpr; - alu.src[1].chan = 2; + alu.src[1].chan = tgsi_chan(&inst->Src[0], 2); alu.dst.sel = ctx->temp_reg; alu.dst.chan = 2; alu.dst.write = 1; @@ -955,7 +1044,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) if (inst->Texture.Texture != TGSI_TEXTURE_RECT) { tex.coord_type_x = 1; tex.coord_type_y = 1; - tex.coord_type_z = 1; + tex.coord_type_z = 1; tex.coord_type_w = 1; } return r600_bc_add_tex(ctx->bc, &tex); @@ -964,19 +1053,22 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) static int tgsi_lrp(struct r600_shader_ctx *ctx) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; + struct r600_bc_alu_src r600_src[3]; struct r600_bc_alu alu; unsigned i; int r; + r = tgsi_split_constant(ctx, r600_src); + if (r) + return r; /* 1 - src0 */ for (i = 0; i < 4; i++) { memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD; alu.src[0].sel = 249; alu.src[0].chan = 0; - r = tgsi_src(ctx, &inst->Src[0], i, &alu.src[1]); - if (r) - return r; + alu.src[1] = r600_src[0]; + alu.src[1].chan = tgsi_chan(&inst->Src[0], i); alu.src[1].neg = 1; alu.dst.sel = ctx->temp_reg; alu.dst.chan = i; @@ -998,9 +1090,8 @@ static int tgsi_lrp(struct r600_shader_ctx *ctx) alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL; alu.src[0].sel = ctx->temp_reg; alu.src[0].chan = i; - r = tgsi_src(ctx, &inst->Src[2], i, &alu.src[1]); - if (r) - return r; + alu.src[1] = r600_src[2]; + alu.src[1].chan = tgsi_chan(&inst->Src[2], i); alu.dst.sel = ctx->temp_reg; alu.dst.chan = i; if (i == 3) { @@ -1020,12 +1111,10 @@ static int tgsi_lrp(struct r600_shader_ctx *ctx) memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD; alu.is_op3 = 1; - r = tgsi_src(ctx, &inst->Src[0], i, &alu.src[0]); - if (r) - return r; - r = tgsi_src(ctx, &inst->Src[1], i, &alu.src[1]); - if (r) - return r; + alu.src[0] = r600_src[0]; + alu.src[0].chan = tgsi_chan(&inst->Src[0], i); + alu.src[1] = r600_src[1]; + alu.src[1].chan = tgsi_chan(&inst->Src[1], i); alu.src[2].sel = ctx->temp_reg; alu.src[2].chan = i; alu.dst.sel = ctx->temp_reg; @@ -1044,8 +1133,8 @@ static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = { {TGSI_OPCODE_ARL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_MOV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2}, {TGSI_OPCODE_LIT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit}, - {TGSI_OPCODE_RCP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE, tgsi_trans}, - {TGSI_OPCODE_RSQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE, tgsi_trans}, + {TGSI_OPCODE_RCP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE, tgsi_trans_srcx_replicate}, + {TGSI_OPCODE_RSQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE, tgsi_trans_srcx_replicate}, {TGSI_OPCODE_EXP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_LOG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_MUL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2}, @@ -1071,7 +1160,7 @@ static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = { {TGSI_OPCODE_CLAMP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_FLR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_ROUND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_EX2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, tgsi_trans}, + {TGSI_OPCODE_EX2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, tgsi_trans_srcx_replicate}, {TGSI_OPCODE_LG2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_POW, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_XPD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, -- cgit v1.2.3 From 8f6341d42131e6f60f269610d62b7f5b7b683052 Mon Sep 17 00:00:00 2001 From: Jerome Glisse Date: Tue, 3 Aug 2010 15:49:21 -0400 Subject: r600g: fix stencil Signed-off-by: Jerome Glisse --- src/gallium/drivers/r600/r600_blit.c | 2 -- src/gallium/drivers/r600/r600_state.c | 13 +++++++------ 2 files changed, 7 insertions(+), 8 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/r600_blit.c b/src/gallium/drivers/r600/r600_blit.c index cc37227ead..f4eedfe4cb 100644 --- a/src/gallium/drivers/r600/r600_blit.c +++ b/src/gallium/drivers/r600/r600_blit.c @@ -86,7 +86,6 @@ static void r600_clear_render_target(struct pipe_context *pipe, util_blitter_clear_render_target(rctx->blitter, dst, rgba, dstx, dsty, width, height); -R600_ERR("vtx elem %p\n", rctx->vertex_elements); } static void r600_clear_depth_stencil(struct pipe_context *pipe, @@ -105,7 +104,6 @@ static void r600_clear_depth_stencil(struct pipe_context *pipe, util_blitter_clear_depth_stencil(rctx->blitter, dst, clear_flags, depth, stencil, dstx, dsty, width, height); -R600_ERR("vtx elem %p\n", rctx->vertex_elements); } static void r600_resource_copy_region(struct pipe_context *pipe, diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index 60270e18a6..f687d31e4b 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -869,6 +869,7 @@ static struct radeon_state *r600_viewport(struct r600_context *rctx) static struct radeon_state *r600_dsa(struct r600_context *rctx) { const struct pipe_depth_stencil_alpha_state *state = &rctx->dsa->state.dsa; + const struct pipe_stencil_ref *stencil_ref = &rctx->stencil_ref->state.stencil_ref; struct r600_screen *rscreen = rctx->screen; struct radeon_state *rstate; unsigned db_depth_control, alpha_test_control, alpha_ref; @@ -880,30 +881,30 @@ static struct radeon_state *r600_dsa(struct r600_context *rctx) stencil_ref_mask = 0; stencil_ref_mask_bf = 0; - db_depth_control = 0x00700700 | - S_028800_Z_ENABLE(state->depth.enabled) | + db_depth_control = S_028800_Z_ENABLE(state->depth.enabled) | S_028800_Z_WRITE_ENABLE(state->depth.writemask) | S_028800_ZFUNC(state->depth.func); /* set stencil enable */ - db_depth_control |= S_028800_STENCIL_ENABLE(state->stencil[0].enabled); - - if (state->stencil[0].enabled) { + if (state->stencil[0].enabled) { + db_depth_control |= S_028800_STENCIL_ENABLE(1); db_depth_control |= S_028800_STENCILFUNC(r600_translate_ds_func(state->stencil[0].func)); db_depth_control |= S_028800_STENCILFAIL(r600_translate_stencil_op(state->stencil[0].fail_op)); db_depth_control |= S_028800_STENCILZPASS(r600_translate_stencil_op(state->stencil[0].zpass_op)); db_depth_control |= S_028800_STENCILZFAIL(r600_translate_stencil_op(state->stencil[0].zfail_op)); - db_depth_control |= S_028800_BACKFACE_ENABLE(state->stencil[1].enabled); stencil_ref_mask = S_028430_STENCILMASK(state->stencil[0].valuemask) | S_028430_STENCILWRITEMASK(state->stencil[0].writemask); + stencil_ref_mask |= S_028430_STENCILREF(stencil_ref->ref_value[0]); if (state->stencil[1].enabled) { + db_depth_control |= S_028800_BACKFACE_ENABLE(1); db_depth_control |= S_028800_STENCILFUNC_BF(r600_translate_ds_func(state->stencil[1].func)); db_depth_control |= S_028800_STENCILFAIL_BF(r600_translate_stencil_op(state->stencil[1].fail_op)); db_depth_control |= S_028800_STENCILZPASS_BF(r600_translate_stencil_op(state->stencil[1].zpass_op)); db_depth_control |= S_028800_STENCILZFAIL_BF(r600_translate_stencil_op(state->stencil[1].zfail_op)); stencil_ref_mask_bf = S_028434_STENCILMASK_BF(state->stencil[1].valuemask) | S_028434_STENCILWRITEMASK_BF(state->stencil[1].writemask); + stencil_ref_mask_bf |= S_028430_STENCILREF(stencil_ref->ref_value[1]); } } -- cgit v1.2.3 From 3a8d4a89795d180b910b2c0dfa98c57cf9bb45a6 Mon Sep 17 00:00:00 2001 From: Jerome Glisse Date: Tue, 3 Aug 2010 16:15:17 -0400 Subject: r600g: fix color target mask Signed-off-by: Jerome Glisse --- src/gallium/drivers/r600/r600_state.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index f687d31e4b..f0abafd6c2 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -1182,8 +1182,7 @@ static struct radeon_state *r600_cb_cntl(struct r600_context *rctx) if (pbs->rt[i].blend_enable) { color_control |= (1 << (8 + i)); target_mask |= (pbs->rt[0].colormask << (4 * i)); - } else if (i == 0) - target_mask |= 0xf; + } } rstate = radeon_state(rscreen->rw, R600_CB_CNTL_TYPE, R600_CB_CNTL); rstate->states[R600_CB_CNTL__CB_SHADER_MASK] = 0x0000000F; -- cgit v1.2.3 From a0b3944c30fdecf5ef97fd602b8a286bcc80fed3 Mon Sep 17 00:00:00 2001 From: Jerome Glisse Date: Tue, 3 Aug 2010 17:02:48 -0400 Subject: r600g: flush and resubmit if we reach limit Signed-off-by: Jerome Glisse --- src/gallium/drivers/r600/r600_context.c | 2 +- src/gallium/drivers/r600/r600_context.h | 38 ++------------------------------- src/gallium/drivers/r600/r600_draw.c | 4 ++++ src/gallium/drivers/r600/r600_helper.c | 6 ++++++ 4 files changed, 13 insertions(+), 37 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/r600_context.c b/src/gallium/drivers/r600/r600_context.c index 8b191914f5..ae1780a1d4 100644 --- a/src/gallium/drivers/r600/r600_context.c +++ b/src/gallium/drivers/r600/r600_context.c @@ -41,7 +41,7 @@ static void r600_destroy_context(struct pipe_context *context) FREE(rctx); } -static void r600_flush(struct pipe_context *ctx, unsigned flags, +void r600_flush(struct pipe_context *ctx, unsigned flags, struct pipe_fence_handle **fence) { struct r600_context *rctx = r600_context(ctx); diff --git a/src/gallium/drivers/r600/r600_context.h b/src/gallium/drivers/r600/r600_context.h index f8fdce50dc..8d102b6850 100644 --- a/src/gallium/drivers/r600/r600_context.h +++ b/src/gallium/drivers/r600/r600_context.h @@ -113,23 +113,6 @@ struct r600_context { struct radeon_draw *draw; /* hw states */ struct r600_context_hw_states hw_states; -#if 0 - struct r600_pipe_shader *ps_shader; - struct r600_pipe_shader *vs_shader; - unsigned nps_sampler; - struct radeon_state *ps_sampler[PIPE_MAX_ATTRIBS]; - unsigned nps_view; - unsigned nvs_view; - struct r600_texture_resource *ps_view[PIPE_MAX_ATTRIBS]; - struct r600_texture_resource *vs_view[PIPE_MAX_ATTRIBS]; - unsigned flat_shade; - unsigned nvertex_buffer; - struct r600_vertex_elements_state *vertex_elements; - struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS]; - struct pipe_stencil_ref stencil_ref; - struct pipe_framebuffer_state fb_state; - struct pipe_viewport_state viewport; -#endif /* pipe states */ unsigned flat_shade; unsigned ps_nsampler; @@ -161,25 +144,6 @@ struct r600_context { struct pipe_blend_color blend_color; }; -#if 0 -struct r600_vertex_elements_state -{ - unsigned count; - struct pipe_vertex_element elements[32]; -}; - -struct r600_pipe_shader { - struct r600_shader shader; - struct radeon_bo *bo; - struct radeon_state *state; -}; - -struct r600_texture_resource { - struct pipe_sampler_view view; - struct radeon_state *state; -}; -#endif - /* Convenience cast wrapper. */ static INLINE struct r600_context *r600_context(struct pipe_context *pipe) { @@ -189,6 +153,8 @@ static INLINE struct r600_context *r600_context(struct pipe_context *pipe) struct r600_context_state *r600_context_state(struct r600_context *rctx, unsigned type, const void *state); struct r600_context_state *r600_context_state_incref(struct r600_context_state *rstate); struct r600_context_state *r600_context_state_decref(struct r600_context_state *rstate); +void r600_flush(struct pipe_context *ctx, unsigned flags, + struct pipe_fence_handle **fence); int r600_context_hw_states(struct r600_context *rctx); diff --git a/src/gallium/drivers/r600/r600_draw.c b/src/gallium/drivers/r600/r600_draw.c index c52dfd3fb3..43c805b982 100644 --- a/src/gallium/drivers/r600/r600_draw.c +++ b/src/gallium/drivers/r600/r600_draw.c @@ -170,6 +170,10 @@ static int r600_draw_common(struct r600_draw *draw) return r; /* FIXME */ r = radeon_ctx_set_draw_new(rctx->ctx, rctx->draw); + if (r == -EBUSY) { + r600_flush(draw->ctx, 0, NULL); + r = radeon_ctx_set_draw_new(rctx->ctx, rctx->draw); + } if (r) return r; rctx->draw = radeon_draw_duplicate(rctx->draw); diff --git a/src/gallium/drivers/r600/r600_helper.c b/src/gallium/drivers/r600/r600_helper.c index 132abf90a3..c672fe7386 100644 --- a/src/gallium/drivers/r600/r600_helper.c +++ b/src/gallium/drivers/r600/r600_helper.c @@ -61,6 +61,12 @@ int r600_conv_pipe_format(unsigned pformat, unsigned *format) case PIPE_FORMAT_I8_UNORM: *format = V_0280A0_COLOR_8; return 0; + case PIPE_FORMAT_B4G4R4A4_UNORM: + *format = V_0280A0_COLOR_4_4_4_4; + return 0; + case PIPE_FORMAT_B5G6R5_UNORM: + *format = V_0280A0_COLOR_5_6_5; + return 0; case PIPE_FORMAT_L16_UNORM: case PIPE_FORMAT_Z16_UNORM: case PIPE_FORMAT_Z32_UNORM: -- cgit v1.2.3 From dd863bf5e7c6680075cf7c355a026b1da69ee9e3 Mon Sep 17 00:00:00 2001 From: Jerome Glisse Date: Tue, 3 Aug 2010 17:45:51 -0400 Subject: r600g: add polygon offset support Signed-off-by: Jerome Glisse --- src/gallium/drivers/r600/r600_shader.c | 2 +- src/gallium/drivers/r600/r600_state.c | 44 +++++++++++++++++++++++++++++----- src/gallium/drivers/r600/r600d.h | 23 ++++++++++++++++++ 3 files changed, 62 insertions(+), 7 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index 7d304f5ae8..43b3e40fad 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -1142,7 +1142,7 @@ static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = { {TGSI_OPCODE_DP3, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, {TGSI_OPCODE_DP4, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, {TGSI_OPCODE_DST, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_MIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_MIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2}, {TGSI_OPCODE_MAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2}, {TGSI_OPCODE_SLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_slt}, {TGSI_OPCODE_SGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index f0abafd6c2..72c0ac5dd1 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -765,8 +765,37 @@ static struct radeon_state *r600_db(struct r600_context *rctx) static struct radeon_state *r600_rasterizer(struct r600_context *rctx) { const struct pipe_rasterizer_state *state = &rctx->rasterizer->state.rasterizer; + const struct pipe_framebuffer_state *fb = &rctx->framebuffer->state.framebuffer; struct r600_screen *rscreen = rctx->screen; struct radeon_state *rstate; + float offset_units = 0, offset_scale = 0; + char depth = 0; + unsigned offset_db_fmt_cntl = 0; + + if (fb->zsbuf) { + offset_units = state->offset_units; + offset_scale = state->offset_scale * 12.0f; + switch (fb->zsbuf->texture->format) { + case PIPE_FORMAT_Z24X8_UNORM: + case PIPE_FORMAT_Z24_UNORM_S8_USCALED: + depth = -24; + offset_units *= 2.0f; + break; + case PIPE_FORMAT_Z32_FLOAT: + depth = -23; + offset_units *= 1.0f; + offset_db_fmt_cntl |= S_028DF8_POLY_OFFSET_DB_IS_FLOAT_FMT(1); + break; + case PIPE_FORMAT_Z16_UNORM: + depth = -16; + offset_units *= 4.0f; + break; + default: + R600_ERR("unsupported %d\n", fb->zsbuf->texture->format); + return NULL; + } + } + offset_db_fmt_cntl |= S_028DF8_POLY_OFFSET_NEG_NUM_DB_BITS(depth); rctx->flat_shade = state->flatshade; rstate = radeon_state(rscreen->rw, R600_RASTERIZER_TYPE, R600_RASTERIZER); @@ -777,7 +806,10 @@ static struct radeon_state *r600_rasterizer(struct r600_context *rctx) rstate->states[R600_RASTERIZER__PA_SU_SC_MODE_CNTL] = 0x00080000 | S_028814_CULL_FRONT((state->cull_face & PIPE_FACE_FRONT) ? 1 : 0) | S_028814_CULL_BACK((state->cull_face & PIPE_FACE_BACK) ? 1 : 0) | - S_028814_FACE(!state->front_ccw); + S_028814_FACE(!state->front_ccw) | + S_028814_POLY_OFFSET_FRONT_ENABLE(state->offset_tri) | + S_028814_POLY_OFFSET_BACK_ENABLE(state->offset_tri) | + S_028814_POLY_OFFSET_PARA_ENABLE(state->offset_tri); rstate->states[R600_RASTERIZER__PA_CL_VS_OUT_CNTL] = 0x00000000; rstate->states[R600_RASTERIZER__PA_CL_NANINF_CNTL] = 0x00000000; rstate->states[R600_RASTERIZER__PA_SU_POINT_SIZE] = 0x00080008; @@ -790,12 +822,12 @@ static struct radeon_state *r600_rasterizer(struct r600_context *rctx) rstate->states[R600_RASTERIZER__PA_CL_GB_VERT_DISC_ADJ] = 0x3F800000; rstate->states[R600_RASTERIZER__PA_CL_GB_HORZ_CLIP_ADJ] = 0x3F800000; rstate->states[R600_RASTERIZER__PA_CL_GB_HORZ_DISC_ADJ] = 0x3F800000; - rstate->states[R600_RASTERIZER__PA_SU_POLY_OFFSET_DB_FMT_CNTL] = 0x00000000; + rstate->states[R600_RASTERIZER__PA_SU_POLY_OFFSET_DB_FMT_CNTL] = offset_db_fmt_cntl; rstate->states[R600_RASTERIZER__PA_SU_POLY_OFFSET_CLAMP] = 0x00000000; - rstate->states[R600_RASTERIZER__PA_SU_POLY_OFFSET_FRONT_SCALE] = 0x00000000; - rstate->states[R600_RASTERIZER__PA_SU_POLY_OFFSET_FRONT_OFFSET] = 0x00000000; - rstate->states[R600_RASTERIZER__PA_SU_POLY_OFFSET_BACK_SCALE] = 0x00000000; - rstate->states[R600_RASTERIZER__PA_SU_POLY_OFFSET_BACK_OFFSET] = 0x00000000; + rstate->states[R600_RASTERIZER__PA_SU_POLY_OFFSET_FRONT_SCALE] = fui(offset_scale); + rstate->states[R600_RASTERIZER__PA_SU_POLY_OFFSET_FRONT_OFFSET] = fui(offset_units); + rstate->states[R600_RASTERIZER__PA_SU_POLY_OFFSET_BACK_SCALE] = fui(offset_scale); + rstate->states[R600_RASTERIZER__PA_SU_POLY_OFFSET_BACK_OFFSET] = fui(offset_units); if (radeon_state_pm4(rstate)) { radeon_state_decref(rstate); return NULL; diff --git a/src/gallium/drivers/r600/r600d.h b/src/gallium/drivers/r600/r600d.h index 6085596ea8..24cb8593f7 100644 --- a/src/gallium/drivers/r600/r600d.h +++ b/src/gallium/drivers/r600/r600d.h @@ -539,6 +539,29 @@ #define S_028D10_IGNORE_SC_ZRANGE(x) (((x) & 0x1) << 17) #define G_028D10_IGNORE_SC_ZRANGE(x) (((x) >> 17) & 0x1) #define C_028D10_IGNORE_SC_ZRANGE 0xFFFDFFFF +#define R_028DF8_PA_SU_POLY_OFFSET_DB_FMT_CNTL 0x028DF8 +#define S_028DF8_POLY_OFFSET_NEG_NUM_DB_BITS(x) (((x) & 0xFF) << 0) +#define G_028DF8_POLY_OFFSET_NEG_NUM_DB_BITS(x) (((x) >> 0) & 0xFF) +#define C_028DF8_POLY_OFFSET_NEG_NUM_DB_BITS 0xFFFFFF00 +#define S_028DF8_POLY_OFFSET_DB_IS_FLOAT_FMT(x) (((x) & 0x1) << 8) +#define G_028DF8_POLY_OFFSET_DB_IS_FLOAT_FMT(x) (((x) >> 8) & 0x1) +#define C_028DF8_POLY_OFFSET_DB_IS_FLOAT_FMT 0xFFFFFEFF +#define R_028E00_PA_SU_POLY_OFFSET_FRONT_SCALE 0x028E00 +#define S_028E00_SCALE(x) (((x) & 0xFFFFFFFF) << 0) +#define G_028E00_SCALE(x) (((x) >> 0) & 0xFFFFFFFF) +#define C_028E00_SCALE 0x00000000 +#define R_028E04_PA_SU_POLY_OFFSET_FRONT_OFFSET 0x028E04 +#define S_028E04_OFFSET(x) (((x) & 0xFFFFFFFF) << 0) +#define G_028E04_OFFSET(x) (((x) >> 0) & 0xFFFFFFFF) +#define C_028E04_OFFSET 0x00000000 +#define R_028E08_PA_SU_POLY_OFFSET_BACK_SCALE 0x028E08 +#define S_028E08_SCALE(x) (((x) & 0xFFFFFFFF) << 0) +#define G_028E08_SCALE(x) (((x) >> 0) & 0xFFFFFFFF) +#define C_028E08_SCALE 0x00000000 +#define R_028E0C_PA_SU_POLY_OFFSET_BACK_OFFSET 0x028E0C +#define S_028E0C_OFFSET(x) (((x) & 0xFFFFFFFF) << 0) +#define G_028E0C_OFFSET(x) (((x) >> 0) & 0xFFFFFFFF) +#define C_028E0C_OFFSET 0x00000000 #define R_028A40_VGT_GS_MODE 0x028A40 #define S_028A40_MODE(x) (((x) & 0x3) << 0) #define G_028A40_MODE(x) (((x) >> 0) & 0x3) -- cgit v1.2.3 From 8bc5fe1ad67127f642c47da0a307aa7bd8696fab Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Wed, 4 Aug 2010 09:36:17 +1000 Subject: r300g: disable multisample visuals until the state tracker bits catch up. This stops us advertising lots of ms visuals we can't actually use. Signed-off-by: Dave Airlie --- src/gallium/drivers/r300/r300_screen.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c index 676430f5fe..6268001054 100644 --- a/src/gallium/drivers/r300/r300_screen.c +++ b/src/gallium/drivers/r300/r300_screen.c @@ -279,11 +279,14 @@ static boolean r300_is_format_supported(struct pipe_screen* screen, case 3: case 4: case 6: + return FALSE; +#if 0 if (usage != PIPE_BIND_RENDER_TARGET || !util_format_is_rgba8_variant( util_format_description(format))) { return FALSE; } +#endif break; default: return FALSE; -- cgit v1.2.3 From 2824d5687a19e42ba0da8fd08e80610c4469a3b3 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Tue, 3 Aug 2010 15:23:23 -0700 Subject: r300/compiler: r500 hw support for break and continue in loops. The BGNLOOP and ENDLOOP instructions are now being used correctly, which makes break and continue possible. The deadcode pass has been modified to handle breaks, and the compiler is more careful about which loops are unrolled. --- src/gallium/drivers/r300/r300_fs.c | 9 +- src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c | 8 +- src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c | 9 +- src/mesa/drivers/dri/r300/compiler/r500_fragprog.c | 25 --- src/mesa/drivers/dri/r300/compiler/r500_fragprog.h | 2 - .../drivers/dri/r300/compiler/r500_fragprog_emit.c | 114 ++++++---- src/mesa/drivers/dri/r300/compiler/radeon_code.h | 3 + .../dri/r300/compiler/radeon_dataflow_deadcode.c | 39 +++- .../dri/r300/compiler/radeon_emulate_loops.c | 237 +++++++++++++-------- .../dri/r300/compiler/radeon_emulate_loops.h | 9 +- .../drivers/dri/r300/compiler/radeon_optimize.c | 3 +- 11 files changed, 289 insertions(+), 169 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_fs.c b/src/gallium/drivers/r300/r300_fs.c index db5269912e..87ff49a90c 100644 --- a/src/gallium/drivers/r300/r300_fs.c +++ b/src/gallium/drivers/r300/r300_fs.c @@ -248,13 +248,18 @@ static void r300_emit_fs_code_to_buffer( shader->cb_code_size = 19 + ((code->inst_end + 1) * 6) + - imm_count * 7; + imm_count * 7 + + code->int_constant_count * 2; NEW_CB(shader->cb_code, shader->cb_code_size); OUT_CB_REG(R500_US_CONFIG, R500_ZERO_TIMES_ANYTHING_EQUALS_ZERO); OUT_CB_REG(R500_US_PIXSIZE, code->max_temp_idx); OUT_CB_REG(R500_US_FC_CTRL, code->us_fc_ctrl); - OUT_CB_REG(R500_US_CODE_RANGE, + for(i = 0; i < code->int_constant_count; i++){ + OUT_CB_REG(R500_US_FC_INT_CONST_0 + (i * 4), + code->int_constants[i]); + } + OUT_CB_REG(R500_US_CODE_RANGE, R500_US_CODE_RANGE_ADDR(0) | R500_US_CODE_RANGE_SIZE(code->inst_end)); OUT_CB_REG(R500_US_CODE_OFFSET, 0); OUT_CB_REG(R500_US_CODE_ADDR, diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c index a326ee4c4f..070939497c 100644 --- a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c +++ b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c @@ -109,13 +109,13 @@ void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c) debug_program_log(c, "before compilation"); if (c->Base.is_r500){ - r500_transform_unroll_loops(&c->Base, &loop_state); - debug_program_log(c, "after r500 transform loops"); + rc_unroll_loops(&c->Base, R500_PFS_MAX_INST); + debug_program_log(c, "after unroll loops"); } else{ - rc_transform_unroll_loops(&c->Base, &loop_state); + rc_transform_loops(&c->Base, &loop_state, R300_PFS_MAX_ALU_INST); debug_program_log(c, "after transform loops"); - + rc_emulate_branches(&c->Base); debug_program_log(c, "after emulate branches"); } diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c index d347b4df9c..fe34ff67cd 100644 --- a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c +++ b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c @@ -633,7 +633,7 @@ static struct rc_swizzle_caps r300_vertprog_swizzle_caps = { void r3xx_compile_vertex_program(struct r300_vertex_program_compiler* compiler) { struct emulate_loop_state loop_state; - + compiler->Base.SwizzleCaps = &r300_vertprog_swizzle_caps; addArtificialOutputs(compiler); @@ -643,14 +643,13 @@ void r3xx_compile_vertex_program(struct r300_vertex_program_compiler* compiler) /* XXX Ideally this should be done only for r3xx, but since * we don't have branching support for r5xx, we use the emulation * on all chipsets. */ - rc_transform_unroll_loops(&compiler->Base, &loop_state); - - debug_program_log(compiler, "after transform loops"); - + if (compiler->Base.is_r500){ + rc_transform_loops(&compiler->Base, &loop_state, R500_VS_MAX_ALU); rc_emulate_loops(&loop_state, R500_VS_MAX_ALU); } else { rc_emulate_loops(&loop_state, R300_VS_MAX_ALU); + rc_transform_loops(&compiler->Base, &loop_state, R300_VS_MAX_ALU); } debug_program_log(compiler, "after emulate loops"); diff --git a/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c index e6b5522c5b..95be619d5d 100644 --- a/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c +++ b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c @@ -60,31 +60,6 @@ int r500_transform_IF( return 1; } -/** - * Rewrite loops to make them easier to emit. This is not a local - * transformation, because it modifies and reorders an entire block of code. - */ -void r500_transform_unroll_loops(struct radeon_compiler * c, - struct emulate_loop_state *s) -{ - int i; - - rc_transform_unroll_loops(c, s); - - for( i = s->LoopCount - 1; i >= 0; i-- ){ - struct rc_instruction * inst_continue; - if(!s->Loops[i].EndLoop){ - continue; - } - /* Insert a continue instruction at the end of the loop. This - * is required in order to emit loops correctly. */ - inst_continue = rc_insert_new_instruction(c, - s->Loops[i].EndIf->Prev); - inst_continue->U.I.Opcode = RC_OPCODE_CONTINUE; - } - -} - static int r500_swizzle_is_native(rc_opcode opcode, struct rc_src_register reg) { unsigned int relevant; diff --git a/src/mesa/drivers/dri/r300/compiler/r500_fragprog.h b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.h index 0d005a794f..34173351f8 100644 --- a/src/mesa/drivers/dri/r300/compiler/r500_fragprog.h +++ b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.h @@ -49,6 +49,4 @@ extern int r500_transform_IF( struct rc_instruction * inst, void* data); -void r500_transform_unroll_loops(struct radeon_compiler * c, - struct emulate_loop_state * s); #endif diff --git a/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c b/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c index 0bd8f0a239..c3f817ad4e 100644 --- a/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c +++ b/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c @@ -64,7 +64,12 @@ struct branch_info { }; struct loop_info { - int LoopStart; + int BgnLoop; + + int BranchDepth; + int * Brks; + int BrkCount; + int BrkReserved; }; struct emit_state { @@ -368,6 +373,12 @@ static void emit_flowcontrol(struct emit_state * s, struct rc_instruction * inst unsigned int newip = ++s->Code->inst_end; + /* Currently all loops use the same integer constant to intialize + * the loop variables. */ + if(!s->Code->int_constants[0]) { + s->Code->int_constants[0] = R500_FC_INT_CONST_KR(0xff); + s->Code->int_constant_count = 1; + } s->Code->inst[newip].inst0 = R500_INST_TYPE_FC | R500_INST_ALU_WAIT; switch(inst->U.I.Opcode){ @@ -378,32 +389,69 @@ static void emit_flowcontrol(struct emit_state * s, struct rc_instruction * inst s->Loops, s->CurrentLoopDepth, s->LoopsReserved, 1); loop = &s->Loops[s->CurrentLoopDepth++]; - - /* We don't emit an instruction for BGNLOOP, so we need to - * decrement the instruction counter, but first we need to - * set LoopStart to the current value of inst_end, which - * will end up being the first real instruction in the loop.*/ - loop->LoopStart = s->Code->inst_end--; + memset(loop, 0, sizeof(struct loop_info)); + loop->BranchDepth = s->CurrentBranchDepth; + loop->BgnLoop = newip; + + s->Code->inst[newip].inst2 = R500_FC_OP_LOOP + | R500_FC_JUMP_FUNC(0x00) + | R500_FC_IGNORE_UNCOVERED + ; break; - case RC_OPCODE_BRK: - /* Don't emit an instruction for BRK */ - s->Code->inst_end--; + loop = &s->Loops[s->CurrentLoopDepth - 1]; + memory_pool_array_reserve(&s->C->Pool, int, loop->Brks, + loop->BrkCount, loop->BrkReserved, 1); + + loop->Brks[loop->BrkCount++] = newip; + s->Code->inst[newip].inst2 = R500_FC_OP_BREAKLOOP + | R500_FC_JUMP_FUNC(0xff) + | R500_FC_B_OP1_DECR + | R500_FC_B_POP_CNT( + s->CurrentBranchDepth - loop->BranchDepth) + | R500_FC_IGNORE_UNCOVERED + ; break; case RC_OPCODE_CONTINUE: loop = &s->Loops[s->CurrentLoopDepth - 1]; - s->Code->inst[newip].inst2 = R500_FC_OP_JUMP | - R500_FC_JUMP_FUNC(0xff); - s->Code->inst[newip].inst3 = R500_FC_JUMP_ADDR(loop->LoopStart); + s->Code->inst[newip].inst2 = R500_FC_OP_JUMP + | R500_FC_JUMP_FUNC(0xff) + | R500_FC_B_OP1_DECR + | R500_FC_B_POP_CNT( + s->CurrentBranchDepth - loop->BranchDepth) + ; + s->Code->inst[newip].inst3 = R500_FC_JUMP_ADDR(loop->BgnLoop); break; case RC_OPCODE_ENDLOOP: - /* Don't emit an instruction for ENDLOOP */ - s->Code->inst_end--; + { + unsigned int i; + loop = &s->Loops[s->CurrentLoopDepth - 1]; + /* Emit ENDLOOP */ + s->Code->inst[newip].inst2 = R500_FC_OP_ENDLOOP + | R500_FC_JUMP_FUNC(0xff) + | R500_FC_JUMP_ANY + | R500_FC_IGNORE_UNCOVERED + ; + /* The constant integer at index 0 is used by all loops. */ + s->Code->inst[newip].inst3 = R500_FC_INT_ADDR(0) + | R500_FC_JUMP_ADDR(loop->BgnLoop + 1) + ; + + /* Set jump address and int constant for BGNLOOP */ + s->Code->inst[loop->BgnLoop].inst3 = R500_FC_INT_ADDR(0) + | R500_FC_JUMP_ADDR(newip) + ; + + /* Set jump address for the BRK instructions. */ + while(loop->BrkCount--) { + s->Code->inst[loop->Brks[loop->BrkCount]].inst3 = + R500_FC_JUMP_ADDR(newip + 1); + } s->CurrentLoopDepth--; break; - + } case RC_OPCODE_IF: if ( s->CurrentBranchDepth >= MAX_BRANCH_DEPTH_FULL) { rc_error(s->C, "Branch depth exceeds hardware limit"); @@ -442,24 +490,16 @@ static void emit_flowcontrol(struct emit_state * s, struct rc_instruction * inst } branch = &s->Branches[s->CurrentBranchDepth - 1]; - - if(inst->Prev->U.I.Opcode == RC_OPCODE_BRK){ - branch->Endif = --s->Code->inst_end; - s->Code->inst[branch->Endif].inst2 |= - R500_FC_B_OP0_DECR; - } - else{ - branch->Endif = newip; - - s->Code->inst[branch->Endif].inst2 = R500_FC_OP_JUMP - | R500_FC_A_OP_NONE /* no address stack */ - | R500_FC_JUMP_ANY /* docs says set this, but I don't understand why */ - | R500_FC_B_OP0_DECR /* decrement branch counter if stay */ - | R500_FC_B_OP1_NONE /* no branch counter if stay */ - | R500_FC_B_POP_CNT(1) + branch->Endif = newip; + + s->Code->inst[branch->Endif].inst2 = R500_FC_OP_JUMP + | R500_FC_A_OP_NONE /* no address stack */ + | R500_FC_JUMP_ANY /* docs says set this, but I don't understand why */ + | R500_FC_B_OP0_DECR /* decrement branch counter if stay */ + | R500_FC_B_OP1_NONE /* no branch counter if stay */ + | R500_FC_B_POP_CNT(1) ; - s->Code->inst[branch->Endif].inst3 = R500_FC_JUMP_ADDR(branch->Endif + 1); - } + s->Code->inst[branch->Endif].inst3 = R500_FC_JUMP_ADDR(branch->Endif + 1); s->Code->inst[branch->If].inst2 = R500_FC_OP_JUMP | R500_FC_A_OP_NONE /* no address stack */ | R500_FC_JUMP_FUNC(0x0f) /* jump if ALU result is false */ @@ -544,11 +584,9 @@ void r500BuildFragmentProgramHwCode(struct r300_fragment_program_compiler *compi code->inst[ip].inst0 = R500_INST_TYPE_OUT | R500_INST_TEX_SEM_WAIT; } - /* Use FULL flow control mode if branches are nested deep enough. - * We don not need to enable FULL flow control mode for loops, becasue - * we aren't using the hardware loop instructions. - */ - if (s.MaxBranchDepth >= 4) { + /* Enable full flow control mode if we are using loops or have if + * statements nested at least four deep. */ + if (s.MaxBranchDepth >= 4 || s.LoopsReserved > 0) { if (code->max_temp_idx < 1) code->max_temp_idx = 1; diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_code.h b/src/mesa/drivers/dri/r300/compiler/radeon_code.h index d03689763b..e14a3520dd 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_code.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_code.h @@ -221,6 +221,9 @@ struct r500_fragment_program_code { int max_temp_idx; uint32_t us_fc_ctrl; + + uint32_t int_constants[32]; + uint32_t int_constant_count; }; struct rX00_fragment_program_code { diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_deadcode.c b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_deadcode.c index fbb4235c22..31566a937f 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_deadcode.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_deadcode.c @@ -43,6 +43,12 @@ struct instruction_state { unsigned char SrcReg[3]; }; +struct loopinfo { + struct updatemask_state * Breaks; + unsigned int BreakCount; + unsigned int BreaksReserved; +}; + struct branchinfo { unsigned int HaveElse:1; @@ -59,6 +65,10 @@ struct deadcode_state { struct branchinfo * BranchStack; unsigned int BranchStackSize; unsigned int BranchStackReserved; + + struct loopinfo * LoopStack; + unsigned int LoopStackSize; + unsigned int LoopStackReserved; }; @@ -78,6 +88,22 @@ static void or_updatemasks( dst->Address = a->Address | b->Address; } +static void push_break(struct deadcode_state *s) +{ + struct loopinfo * loop = &s->LoopStack[s->LoopStackSize - 1]; + memory_pool_array_reserve(&s->C->Pool, struct updatemask_state, + loop->Breaks, loop->BreakCount, loop->BreaksReserved, 1); + + memcpy(&loop->Breaks[loop->BreakCount++], &s->R, sizeof(s->R)); +} + +static void push_loop(struct deadcode_state * s) +{ + memory_pool_array_reserve(&s->C->Pool, struct loopinfo, s->LoopStack, + s->LoopStackSize, s->LoopStackReserved, 1); + memset(&s->LoopStack[s->LoopStackSize++], 0, sizeof(struct loopinfo)); +} + static void push_branch(struct deadcode_state * s) { memory_pool_array_reserve(&s->C->Pool, struct branchinfo, s->BranchStack, @@ -233,11 +259,22 @@ void rc_dataflow_deadcode(struct radeon_compiler * c, rc_dataflow_mark_outputs_f } } } + push_loop(&s); break; } - case RC_OPCODE_CONTINUE: case RC_OPCODE_BRK: + push_break(&s); + break; case RC_OPCODE_BGNLOOP: + { + unsigned int i; + struct loopinfo * loop = &s.LoopStack[s.LoopStackSize-1]; + for(i = 0; i < loop->BreakCount; i++) { + or_updatemasks(&s.R, &s.R, &loop->Breaks[i]); + } + break; + } + case RC_OPCODE_CONTINUE: break; case RC_OPCODE_ENDIF: push_branch(&s); diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.c b/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.c index fed4d8829a..94e3e5f4f5 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.c @@ -39,7 +39,6 @@ #define DBG(...) do { if (VERBOSE) fprintf(stderr, __VA_ARGS__); } while(0) struct const_value { - struct radeon_compiler * C; struct rc_src_register * Src; float Value; @@ -78,17 +77,17 @@ static int src_reg_is_immediate(struct rc_src_register * src, c->Program.Constants.Constants[src->Index].Type==RC_CONSTANT_IMMEDIATE; } -static unsigned int loop_calc_iterations(struct emulate_loop_state *s, - struct loop_info * loop, unsigned int max_instructions) +static unsigned int loop_max_possible_iterations(struct radeon_compiler *c, + struct loop_info * loop, unsigned int prog_inst_limit) { - unsigned int total_i = rc_recompute_ips(s->C); + unsigned int total_i = rc_recompute_ips(c); unsigned int loop_i = (loop->EndLoop->IP - loop->BeginLoop->IP) - 1; /* +1 because the program already has one iteration of the loop. */ - return 1 + ((max_instructions - total_i) / (s->LoopCount * loop_i)); + return 1 + ((prog_inst_limit - total_i) / loop_i); } -static void loop_unroll(struct emulate_loop_state * s, - struct loop_info *loop, unsigned int iterations) +static void unroll_loop(struct radeon_compiler * c, struct loop_info * loop, + unsigned int iterations) { unsigned int i; struct rc_instruction * ptr; @@ -99,7 +98,7 @@ static void loop_unroll(struct emulate_loop_state * s, rc_remove_instruction(loop->EndLoop); for( i = 1; i < iterations; i++){ for(ptr = first; ptr != last->Next; ptr = ptr->Next){ - struct rc_instruction *new = rc_alloc_instruction(s->C); + struct rc_instruction *new = rc_alloc_instruction(c); memcpy(new, ptr, sizeof(struct rc_instruction)); rc_insert_instruction(append_to, new); append_to = new; @@ -115,7 +114,7 @@ static void update_const_value(void * data, struct rc_instruction * inst, if(value->Src->File != file || value->Src->Index != index || !(1 << GET_SWZ(value->Src->Swizzle, 0) & mask)){ - return; + return; } switch(inst->U.I.Opcode){ case RC_OPCODE_MOV: @@ -140,7 +139,7 @@ static void get_incr_amount(void * data, struct rc_instruction * inst, if(file != RC_FILE_TEMPORARY || count_inst->Index != index || (1 << GET_SWZ(count_inst->Swz,0) != mask)){ - return; + return; } /* Find the index of the counter register. */ opcode = rc_get_opcode_info(inst->U.I.Opcode); @@ -185,11 +184,10 @@ static void get_incr_amount(void * data, struct rc_instruction * inst, count_inst->Unknown = 1; return; } - } -static int transform_const_loop(struct emulate_loop_state * s, - struct loop_info * loop) +static int try_unroll_loop(struct radeon_compiler * c, struct loop_info * loop, + unsigned int prog_inst_limit) { int end_loops; int iterations; @@ -201,12 +199,12 @@ static int transform_const_loop(struct emulate_loop_state * s, struct rc_instruction * inst; /* Find the counter and the upper limit */ - - if(src_reg_is_immediate(&loop->Cond->U.I.SrcReg[0], s->C)){ + + if(src_reg_is_immediate(&loop->Cond->U.I.SrcReg[0], c)){ limit = &loop->Cond->U.I.SrcReg[0]; counter = &loop->Cond->U.I.SrcReg[1]; } - else if(src_reg_is_immediate(&loop->Cond->U.I.SrcReg[1], s->C)){ + else if(src_reg_is_immediate(&loop->Cond->U.I.SrcReg[1], c)){ limit = &loop->Cond->U.I.SrcReg[1]; counter = &loop->Cond->U.I.SrcReg[0]; } @@ -214,13 +212,13 @@ static int transform_const_loop(struct emulate_loop_state * s, DBG("No constant limit.\n"); return 0; } - + /* Find the initial value of the counter */ counter_value.Src = counter; counter_value.Value = 0.0f; counter_value.HasValue = 0; - counter_value.C = s->C; - for(inst = s->C->Program.Instructions.Next; inst != loop->BeginLoop; + counter_value.C = c; + for(inst = c->Program.Instructions.Next; inst != loop->BeginLoop; inst = inst->Next){ rc_for_all_writes_mask(inst, update_const_value, &counter_value); } @@ -230,7 +228,7 @@ static int transform_const_loop(struct emulate_loop_state * s, } DBG("Initial counter value is %f\n", counter_value.Value); /* Determine how the counter is modified each loop */ - count_inst.C = s->C; + count_inst.C = c; count_inst.Index = counter->Index; count_inst.Swz = counter->Swizzle; count_inst.Amount = 0.0f; @@ -277,17 +275,20 @@ static int transform_const_loop(struct emulate_loop_state * s, /* Calculate the number of iterations of this loop. Keeping this * simple, since we only support increment and decrement loops. */ - limit_value = get_constant_value(s->C, limit, 0); + limit_value = get_constant_value(c, limit, 0); DBG("Limit is %f.\n", limit_value); + /* The iteration calculations are opposite of what you would expect. + * In a normal loop, if the condition is met, then loop continues, but + * with our loops, if the condition is met, the is exited. */ switch(loop->Cond->U.I.Opcode){ - case RC_OPCODE_SGT: - case RC_OPCODE_SLT: + case RC_OPCODE_SGE: + case RC_OPCODE_SLE: iterations = (int) ceilf((limit_value - counter_value.Value) / count_inst.Amount); break; - case RC_OPCODE_SLE: - case RC_OPCODE_SGE: + case RC_OPCODE_SGT: + case RC_OPCODE_SLT: iterations = (int) floorf((limit_value - counter_value.Value) / count_inst.Amount) + 1; break; @@ -295,77 +296,84 @@ static int transform_const_loop(struct emulate_loop_state * s, return 0; } + if (iterations > loop_max_possible_iterations(c, loop, + prog_inst_limit)) { + return 0; + } + DBG("Loop will have %d iterations.\n", iterations); - + /* Prepare loop for unrolling */ rc_remove_instruction(loop->Cond); rc_remove_instruction(loop->If); rc_remove_instruction(loop->Brk); rc_remove_instruction(loop->EndIf); - - loop_unroll(s, loop, iterations); + + unroll_loop(c, loop, iterations); loop->EndLoop = NULL; return 1; } -/** - * This function prepares a loop to be unrolled by converting it into an if - * statement. Here is an outline of the conversion process: - * BGNLOOP; -> BGNLOOP; - * -> - * SGE/SLT temp[0], temp[1], temp[2]; -> SLT/SGE temp[0], temp[1], temp[2]; - * IF temp[0]; -> IF temp[0]; - * BRK; -> - * ENDIF; -> - * -> ENDIF; - * ENDLOOP; -> ENDLOOP - * +/** + * @param c + * @param loop * @param inst A pointer to a BGNLOOP instruction. - * @return If the loop can be unrolled, a pointer to the first instruction of - * the unrolled loop. - * Otherwise, A pointer to the ENDLOOP instruction. - * Null if there is an error. + * @return 1 if all of the members of loop where set. + * @return 0 if there was an error and some members of loop are still NULL. */ -static struct rc_instruction * transform_loop(struct emulate_loop_state * s, +static int build_loop_info(struct radeon_compiler * c, struct loop_info * loop, struct rc_instruction * inst) { - struct loop_info *loop; struct rc_instruction * ptr; - memory_pool_array_reserve(&s->C->Pool, struct loop_info, - s->Loops, s->LoopCount, s->LoopReserved, 1); - - loop = &s->Loops[s->LoopCount++]; - memset(loop, 0, sizeof(struct loop_info)); if(inst->U.I.Opcode != RC_OPCODE_BGNLOOP){ - rc_error(s->C, "expected BGNLOOP\n", __FUNCTION__); - return NULL; + rc_error(c, "%s: expected BGNLOOP", __FUNCTION__); + return 0; } + + memset(loop, 0, sizeof(struct loop_info)); + loop->BeginLoop = inst; - for(ptr = loop->BeginLoop->Next; !loop->EndLoop; ptr = ptr->Next){ + for(ptr = loop->BeginLoop->Next; !loop->EndLoop; ptr = ptr->Next) { + + if (ptr == &c->Program.Instructions) { + rc_error(c, "%s: BGNLOOP without an ENDLOOOP.\n", + __FUNCTION__); + return 0; + } + switch(ptr->U.I.Opcode){ case RC_OPCODE_BGNLOOP: - /* Nested loop */ - ptr = transform_loop(s, ptr); - if(!ptr){ - return NULL; + { + /* Nested loop, skip ahead to the end. */ + unsigned int loop_depth = 1; + for(ptr = ptr->Next; ptr != &c->Program.Instructions; + ptr = ptr->Next){ + if (ptr->U.I.Opcode == RC_OPCODE_BGNLOOP) { + loop_depth++; + } else if (ptr->U.I.Opcode == RC_OPCODE_ENDLOOP) { + if (!--loop_depth) { + break; + } + } + } + if (ptr == &c->Program.Instructions) { + rc_error(c, "%s: BGNLOOP without an ENDLOOOP\n", + __FUNCTION__); + return 0; } break; + } case RC_OPCODE_BRK: - loop->Brk = ptr; - if(ptr->Next->U.I.Opcode != RC_OPCODE_ENDIF){ - rc_error(s->C, - "%s: expected ENDIF\n",__FUNCTION__); - return NULL; - } - loop->EndIf = ptr->Next; - if(ptr->Prev->U.I.Opcode != RC_OPCODE_IF){ - rc_error(s->C, - "%s: expected IF\n", __FUNCTION__); - return NULL; + if(ptr->Next->U.I.Opcode != RC_OPCODE_ENDIF + || ptr->Prev->U.I.Opcode != RC_OPCODE_IF + || loop->Brk){ + continue; } + loop->Brk = ptr; loop->If = ptr->Prev; + loop->EndIf = ptr->Next; switch(loop->If->Prev->U.I.Opcode){ case RC_OPCODE_SLT: case RC_OPCODE_SGE: @@ -375,18 +383,62 @@ static struct rc_instruction * transform_loop(struct emulate_loop_state * s, case RC_OPCODE_SNE: break; default: - rc_error(s->C, "%s expected conditional\n", + rc_error(c, "%s: expected conditional", __FUNCTION__); - return NULL; + return 0; } loop->Cond = loop->If->Prev; - ptr = loop->EndIf; break; + case RC_OPCODE_ENDLOOP: loop->EndLoop = ptr; break; } } + + if (loop->BeginLoop && loop->Brk && loop->If && loop->EndIf + && loop->Cond && loop->EndLoop) { + return 1; + } + return 0; +} + +/** + * This function prepares a loop to be unrolled by converting it into an if + * statement. Here is an outline of the conversion process: + * BGNLOOP; -> BGNLOOP; + * -> + * SGE/SLT temp[0], temp[1], temp[2]; -> SLT/SGE temp[0], temp[1], temp[2]; + * IF temp[0]; -> IF temp[0]; + * BRK; -> + * ENDIF; -> + * -> ENDIF; + * ENDLOOP; -> ENDLOOP + * + * @param inst A pointer to a BGNLOOP instruction. + * @return If the loop can be unrolled, a pointer to the first instruction of + * the unrolled loop. + * Otherwise, A pointer to the ENDLOOP instruction. + * Null if there is an error. + */ +static struct rc_instruction * transform_loop(struct emulate_loop_state * s, + struct rc_instruction * inst, + int prog_inst_limit) +{ + struct loop_info * loop; + + memory_pool_array_reserve(&s->C->Pool, struct loop_info, + s->Loops, s->LoopCount, s->LoopReserved, 1); + + loop = &s->Loops[s->LoopCount++]; + + if (!build_loop_info(s->C, loop, inst)) + return NULL; + + if(try_unroll_loop(s->C, loop, prog_inst_limit)){ + return loop->BeginLoop->Next; + } + /* Reverse the conditional instruction */ switch(loop->Cond->U.I.Opcode){ case RC_OPCODE_SGE: @@ -411,31 +463,27 @@ static struct rc_instruction * transform_loop(struct emulate_loop_state * s, rc_error(s->C, "loop->Cond is not a conditional.\n"); return NULL; } - - /* Check if the number of loops is known at compile time. */ - if(transform_const_loop(s, loop)){ - return loop->BeginLoop->Next; - } - /* Prepare the loop to be unrolled */ + /* Prepare the loop to be emulated */ rc_remove_instruction(loop->Brk); rc_remove_instruction(loop->EndIf); rc_insert_instruction(loop->EndLoop->Prev, loop->EndIf); return loop->EndLoop; } -void rc_transform_unroll_loops(struct radeon_compiler *c, - struct emulate_loop_state * s) +void rc_transform_loops(struct radeon_compiler *c, + struct emulate_loop_state * s, + int prog_inst_limit) { struct rc_instruction * ptr; - + memset(s, 0, sizeof(struct emulate_loop_state)); s->C = c; ptr = s->C->Program.Instructions.Next; while(ptr != &s->C->Program.Instructions) { if(ptr->Type == RC_INSTRUCTION_NORMAL && ptr->U.I.Opcode == RC_OPCODE_BGNLOOP){ - ptr = transform_loop(s, ptr); + ptr = transform_loop(s, ptr, prog_inst_limit); if(!ptr){ return; } @@ -444,8 +492,23 @@ void rc_transform_unroll_loops(struct radeon_compiler *c, } } -void rc_emulate_loops(struct emulate_loop_state *s, - unsigned int max_instructions) +void rc_unroll_loops(struct radeon_compiler *c, int prog_inst_limit) +{ + struct rc_instruction * inst; + struct loop_info loop; + + for(inst = c->Program.Instructions.Next; + inst != &c->Program.Instructions; inst = inst->Next) { + + if (inst->U.I.Opcode == RC_OPCODE_BGNLOOP) { + if (build_loop_info(c, &loop, inst)) { + try_unroll_loop(c, &loop, prog_inst_limit); + } + } + } +} + +void rc_emulate_loops(struct emulate_loop_state *s, int prog_inst_limit) { int i; /* Iterate backwards of the list of loops so that loops that nested @@ -455,8 +518,8 @@ void rc_emulate_loops(struct emulate_loop_state *s, if(!s->Loops[i].EndLoop){ continue; } - unsigned int iterations = loop_calc_iterations(s, &s->Loops[i], - max_instructions); - loop_unroll(s, &s->Loops[i], iterations); + unsigned int iterations = loop_max_possible_iterations( + s->C, &s->Loops[i], prog_inst_limit); + unroll_loop(s->C, &s->Loops[i], iterations); } } diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.h b/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.h index 7748813c4e..339527ba3b 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.h @@ -23,10 +23,11 @@ struct emulate_loop_state { unsigned int LoopReserved; }; -void rc_transform_unroll_loops(struct radeon_compiler *c, - struct emulate_loop_state * s); +void rc_transform_loops(struct radeon_compiler *c, + struct emulate_loop_state * s, int prog_inst_limit); -void rc_emulate_loops(struct emulate_loop_state *s, - unsigned int max_instructions); +void rc_unroll_loops(struct radeon_compiler * c, int prog_inst_limit); + +void rc_emulate_loops(struct emulate_loop_state * s, int prog_inst_limit); #endif /* RADEON_EMULATE_LOOPS_H */ diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c b/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c index eca0651536..7a3f35950a 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c @@ -164,7 +164,8 @@ static void peephole(struct radeon_compiler * c, struct rc_instruction * inst_mo inst = inst->Next) { /* XXX In the future we might be able to make the optimizer * smart enough to handle loops. */ - if(inst->U.I.Opcode == RC_OPCODE_BGNLOOP){ + if(inst->U.I.Opcode == RC_OPCODE_BGNLOOP + || inst->U.I.Opcode == RC_OPCODE_ENDLOOP){ return; } rc_for_all_reads_mask(inst, peephole_scan_read, &s); -- cgit v1.2.3 From 9a78e790dc4c40362b971ad5eff2505c02b73ed7 Mon Sep 17 00:00:00 2001 From: Jerome Glisse Date: Wed, 4 Aug 2010 16:10:11 -0400 Subject: r600g: always perform texture perspective divide + fix blending quake3 engine seems to run fine at this point (ioquake) Signed-off-by: Jerome Glisse --- src/gallium/drivers/r600/r600_shader.c | 121 +++++++++++++------------- src/gallium/drivers/r600/r600_state.c | 5 +- src/gallium/drivers/r600/r600_state_inlines.h | 2 +- src/gallium/drivers/r600/r600d.h | 25 ++++++ 4 files changed, 88 insertions(+), 65 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index 43b3e40fad..8da102cde0 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -963,68 +963,66 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) src_gpr = ctx->file_offset[inst->Src[0].Register.File] + inst->Src[0].Register.Index; /* Add perspective divide */ - if (ctx->inst_info->tgsi_opcode == TGSI_OPCODE_TXP) { - memset(&alu, 0, sizeof(struct r600_bc_alu)); - alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE; - alu.src[0].sel = src_gpr; - alu.src[0].chan = tgsi_chan(&inst->Src[0], 3); - alu.dst.sel = ctx->temp_reg; - alu.dst.chan = 3; - alu.last = 1; - alu.dst.write = 1; - r = r600_bc_add_alu(ctx->bc, &alu); - if (r) - return r; + memset(&alu, 0, sizeof(struct r600_bc_alu)); + alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE; + alu.src[0].sel = src_gpr; + alu.src[0].chan = tgsi_chan(&inst->Src[0], 3); + alu.dst.sel = ctx->temp_reg; + alu.dst.chan = 3; + alu.last = 1; + alu.dst.write = 1; + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; - memset(&alu, 0, sizeof(struct r600_bc_alu)); - alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL; - alu.src[0].sel = ctx->temp_reg; - alu.src[0].chan = 3; - alu.src[1].sel = src_gpr; - alu.src[1].chan = tgsi_chan(&inst->Src[0], 0); - alu.dst.sel = ctx->temp_reg; - alu.dst.chan = 0; - alu.dst.write = 1; - r = r600_bc_add_alu(ctx->bc, &alu); - if (r) - return r; - memset(&alu, 0, sizeof(struct r600_bc_alu)); - alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL; - alu.src[0].sel = ctx->temp_reg; - alu.src[0].chan = 3; - alu.src[1].sel = src_gpr; - alu.src[1].chan = tgsi_chan(&inst->Src[0], 1); - alu.dst.sel = ctx->temp_reg; - alu.dst.chan = 1; - alu.dst.write = 1; - r = r600_bc_add_alu(ctx->bc, &alu); - if (r) - return r; - memset(&alu, 0, sizeof(struct r600_bc_alu)); - alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL; - alu.src[0].sel = ctx->temp_reg; - alu.src[0].chan = 3; - alu.src[1].sel = src_gpr; - alu.src[1].chan = tgsi_chan(&inst->Src[0], 2); - alu.dst.sel = ctx->temp_reg; - alu.dst.chan = 2; - alu.dst.write = 1; - r = r600_bc_add_alu(ctx->bc, &alu); - if (r) - return r; - memset(&alu, 0, sizeof(struct r600_bc_alu)); - alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV; - alu.src[0].sel = 249; - alu.src[0].chan = 0; - alu.dst.sel = ctx->temp_reg; - alu.dst.chan = 3; - alu.last = 1; - alu.dst.write = 1; - r = r600_bc_add_alu(ctx->bc, &alu); - if (r) - return r; - src_gpr = ctx->temp_reg; - } + memset(&alu, 0, sizeof(struct r600_bc_alu)); + alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL; + alu.src[0].sel = ctx->temp_reg; + alu.src[0].chan = 3; + alu.src[1].sel = src_gpr; + alu.src[1].chan = tgsi_chan(&inst->Src[0], 0); + alu.dst.sel = ctx->temp_reg; + alu.dst.chan = 0; + alu.dst.write = 1; + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + memset(&alu, 0, sizeof(struct r600_bc_alu)); + alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL; + alu.src[0].sel = ctx->temp_reg; + alu.src[0].chan = 3; + alu.src[1].sel = src_gpr; + alu.src[1].chan = tgsi_chan(&inst->Src[0], 1); + alu.dst.sel = ctx->temp_reg; + alu.dst.chan = 1; + alu.dst.write = 1; + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + memset(&alu, 0, sizeof(struct r600_bc_alu)); + alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL; + alu.src[0].sel = ctx->temp_reg; + alu.src[0].chan = 3; + alu.src[1].sel = src_gpr; + alu.src[1].chan = tgsi_chan(&inst->Src[0], 2); + alu.dst.sel = ctx->temp_reg; + alu.dst.chan = 2; + alu.dst.write = 1; + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + memset(&alu, 0, sizeof(struct r600_bc_alu)); + alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV; + alu.src[0].sel = 249; + alu.src[0].chan = 0; + alu.dst.sel = ctx->temp_reg; + alu.dst.chan = 3; + alu.last = 1; + alu.dst.write = 1; + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + src_gpr = ctx->temp_reg; /* TODO use temp if src_gpr is not a temporary reg (File != TEMPORARY) */ memset(&tex, 0, sizeof(struct r600_bc_tex)); @@ -1041,6 +1039,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) tex.src_sel_y = 1; tex.src_sel_z = 2; tex.src_sel_w = 3; + if (inst->Texture.Texture != TGSI_TEXTURE_RECT) { tex.coord_type_x = 1; tex.coord_type_y = 1; diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index 72c0ac5dd1..e8a591f73e 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -628,7 +628,6 @@ static struct radeon_state *r600_blend(struct r600_context *rctx) rstate->states[R600_BLEND__CB_BLEND_CONTROL] = 0x00000000; for (i = 0; i < 8; i++) { - unsigned eqRGB = state->rt[i].rgb_func; unsigned srcRGB = state->rt[i].rgb_src_factor; unsigned dstRGB = state->rt[i].rgb_dst_factor; @@ -1202,7 +1201,7 @@ static struct radeon_state *r600_cb_cntl(struct r600_context *rctx) int i; target_mask = 0; - color_control = 0; + color_control = S_028808_PER_MRT_BLEND(1); if (pbs->logicop_enable) { color_control |= (pbs->logicop_func) << 16; @@ -1212,7 +1211,7 @@ static struct radeon_state *r600_cb_cntl(struct r600_context *rctx) target_mask |= (pbs->rt[0].colormask); for (i = 0; i < 8; i++) { if (pbs->rt[i].blend_enable) { - color_control |= (1 << (8 + i)); + color_control |= S_028808_TARGET_BLEND_ENABLE(1 << i); target_mask |= (pbs->rt[0].colormask << (4 * i)); } } diff --git a/src/gallium/drivers/r600/r600_state_inlines.h b/src/gallium/drivers/r600/r600_state_inlines.h index 26a5dd0432..321e75d7a1 100644 --- a/src/gallium/drivers/r600/r600_state_inlines.h +++ b/src/gallium/drivers/r600/r600_state_inlines.h @@ -48,7 +48,7 @@ static INLINE uint32_t r600_translate_blend_factor(int blend_fact) { switch (blend_fact) { case PIPE_BLENDFACTOR_ONE: - return V_028804_BLEND_ZERO; + return V_028804_BLEND_ONE; case PIPE_BLENDFACTOR_SRC_COLOR: return V_028804_BLEND_SRC_COLOR; case PIPE_BLENDFACTOR_SRC_ALPHA: diff --git a/src/gallium/drivers/r600/r600d.h b/src/gallium/drivers/r600/r600d.h index 24cb8593f7..8205bdeadc 100644 --- a/src/gallium/drivers/r600/r600d.h +++ b/src/gallium/drivers/r600/r600d.h @@ -315,6 +315,31 @@ #define S_028800_STENCILZFAIL_BF(x) (((x) & 0x7) << 29) #define G_028800_STENCILZFAIL_BF(x) (((x) >> 29) & 0x7) #define C_028800_STENCILZFAIL_BF 0x1FFFFFFF +#define R_028808_CB_COLOR_CONTROL 0x028808 +#define S_028808_FOG_ENABLE(x) (((x) & 0x1) << 0) +#define G_028808_FOG_ENABLE(x) (((x) >> 0) & 0x1) +#define C_028808_FOG_ENABLE 0xFFFFFFFE +#define S_028808_MULTIWRITE_ENABLE(x) (((x) & 0x1) << 1) +#define G_028808_MULTIWRITE_ENABLE(x) (((x) >> 1) & 0x1) +#define C_028808_MULTIWRITE_ENABLE 0xFFFFFFFD +#define S_028808_DITHER_ENABLE(x) (((x) & 0x1) << 2) +#define G_028808_DITHER_ENABLE(x) (((x) >> 2) & 0x1) +#define C_028808_DITHER_ENABLE 0xFFFFFFFB +#define S_028808_DEGAMMA_ENABLE(x) (((x) & 0x1) << 3) +#define G_028808_DEGAMMA_ENABLE(x) (((x) >> 3) & 0x1) +#define C_028808_DEGAMMA_ENABLE 0xFFFFFFF7 +#define S_028808_SPECIAL_OP(x) (((x) & 0x7) << 4) +#define G_028808_SPECIAL_OP(x) (((x) >> 4) & 0x7) +#define C_028808_SPECIAL_OP 0xFFFFFF8F +#define S_028808_PER_MRT_BLEND(x) (((x) & 0x1) << 7) +#define G_028808_PER_MRT_BLEND(x) (((x) >> 7) & 0x1) +#define C_028808_PER_MRT_BLEND 0xFFFFFF7F +#define S_028808_TARGET_BLEND_ENABLE(x) (((x) & 0xFF) << 8) +#define G_028808_TARGET_BLEND_ENABLE(x) (((x) >> 8) & 0xFF) +#define C_028808_TARGET_BLEND_ENABLE 0xFFFF00FF +#define S_028808_ROP3(x) (((x) & 0xFF) << 16) +#define G_028808_ROP3(x) (((x) >> 16) & 0xFF) +#define C_028808_ROP3 0xFF00FFFF #define R_028010_DB_DEPTH_INFO 0x028010 #define S_028010_FORMAT(x) (((x) & 0x7) << 0) #define G_028010_FORMAT(x) (((x) >> 0) & 0x7) -- cgit v1.2.3 From 12dac449fba5fe17087fd48ac959a5388875b2d3 Mon Sep 17 00:00:00 2001 From: Jerome Glisse Date: Wed, 4 Aug 2010 16:28:33 -0400 Subject: r600g: force flush on map as temporary fix to readpixel Should allow more piglit test to pass. Need to plugin proper flushing. Signed-off-by: Jerome Glisse --- src/gallium/drivers/r600/r600_texture.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/r600_texture.c b/src/gallium/drivers/r600/r600_texture.c index 3725bf8560..033c71f6ef 100644 --- a/src/gallium/drivers/r600/r600_texture.c +++ b/src/gallium/drivers/r600/r600_texture.c @@ -234,6 +234,8 @@ void* r600_texture_transfer_map(struct pipe_context *ctx, struct r600_screen *rscreen = r600_screen(ctx->screen); char *map; + r600_flush(ctx, 0, NULL); + resource = (struct r600_resource *)transfer->resource; if (radeon_bo_map(rscreen->rw, resource->bo)) { return NULL; -- cgit v1.2.3 From 6eb2a7fbafd49e75b6cbbee57f23dda63eff73ef Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Wed, 7 Jul 2010 15:20:19 +0200 Subject: r300g: implement hyper-z support. (v4) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This implements fast Z clear, Z compression, and HiZ support for r300->r500 GPUs. It also allows cbzb clears when fast Z clears are being used for the ZB. It requires a kernel with hyper-z support. Thanks to Marek Olšák , who started this off, and Alex Deucher at AMD for providing lots of hints. v2: squashed zmask ram size fix] squashed r300g/blitter: fix Z readback when compressed] v3: rebase around texture changes in master - .1 fix more bits v4: migrated to using u_mm in r300_texture to manage hiz/zmask rams consistently disabled HiZ when using OQ flush z-cache before turning hyper-z off update hyper-z state on dsa state change store depthclearvalue across cbzb clears and replace it afterwards. Signed-off-by: Dave Airlie --- src/gallium/auxiliary/util/u_blitter.c | 44 +++++ src/gallium/auxiliary/util/u_blitter.h | 2 + src/gallium/drivers/r300/r300_blit.c | 69 +++++++- src/gallium/drivers/r300/r300_chipset.c | 54 +++++- src/gallium/drivers/r300/r300_chipset.h | 14 +- src/gallium/drivers/r300/r300_context.c | 38 ++++- src/gallium/drivers/r300/r300_context.h | 31 +++- src/gallium/drivers/r300/r300_debug.c | 1 + src/gallium/drivers/r300/r300_emit.c | 142 ++++++++++++++- src/gallium/drivers/r300/r300_emit.h | 3 + src/gallium/drivers/r300/r300_flush.c | 3 +- src/gallium/drivers/r300/r300_hyperz.c | 237 +++++++++++++++++++++++++- src/gallium/drivers/r300/r300_hyperz.h | 5 + src/gallium/drivers/r300/r300_reg.h | 1 + src/gallium/drivers/r300/r300_render.c | 3 +- src/gallium/drivers/r300/r300_screen.h | 1 + src/gallium/drivers/r300/r300_state.c | 71 ++++++-- src/gallium/drivers/r300/r300_state_derived.c | 4 +- src/gallium/drivers/r300/r300_texture.c | 9 + src/gallium/drivers/r300/r300_winsys.h | 1 + src/gallium/winsys/radeon/drm/radeon_drm.c | 10 ++ src/gallium/winsys/radeon/drm/radeon_r300.c | 2 + src/gallium/winsys/radeon/drm/radeon_winsys.h | 3 + 23 files changed, 701 insertions(+), 47 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/auxiliary/util/u_blitter.c b/src/gallium/auxiliary/util/u_blitter.c index 0d94aaae95..b5b86b7214 100644 --- a/src/gallium/auxiliary/util/u_blitter.c +++ b/src/gallium/auxiliary/util/u_blitter.c @@ -87,6 +87,7 @@ struct blitter_context_priv void *dsa_write_depth_keep_stencil; void *dsa_keep_depth_stencil; void *dsa_keep_depth_write_stencil; + void *dsa_flush_depth_stencil; void *velem_state; @@ -156,6 +157,10 @@ struct blitter_context *util_blitter_create(struct pipe_context *pipe) ctx->dsa_keep_depth_stencil = pipe->create_depth_stencil_alpha_state(pipe, &dsa); + dsa.depth.writemask = 1; + ctx->dsa_flush_depth_stencil = + pipe->create_depth_stencil_alpha_state(pipe, &dsa); + dsa.depth.enabled = 1; dsa.depth.writemask = 1; dsa.depth.func = PIPE_FUNC_ALWAYS; @@ -940,3 +945,42 @@ void util_blitter_clear_depth_stencil(struct blitter_context *blitter, UTIL_BLITTER_ATTRIB_NONE, NULL); blitter_restore_CSOs(ctx); } + +/* Clear a region of a depth stencil surface. */ +void util_blitter_flush_depth_stencil(struct blitter_context *blitter, + struct pipe_surface *dstsurf) +{ + struct blitter_context_priv *ctx = (struct blitter_context_priv*)blitter; + struct pipe_context *pipe = ctx->base.pipe; + struct pipe_framebuffer_state fb_state; + + assert(dstsurf->texture); + if (!dstsurf->texture) + return; + + /* check the saved state */ + blitter_check_saved_CSOs(ctx); + assert(blitter->saved_fb_state.nr_cbufs != ~0); + + /* bind CSOs */ + pipe->bind_blend_state(pipe, ctx->blend_keep_color); + pipe->bind_depth_stencil_alpha_state(pipe, ctx->dsa_flush_depth_stencil); + + pipe->bind_rasterizer_state(pipe, ctx->rs_state); + pipe->bind_fs_state(pipe, blitter_get_fs_col(ctx, 0)); + pipe->bind_vs_state(pipe, ctx->vs_col); + pipe->bind_vertex_elements_state(pipe, ctx->velem_state); + + /* set a framebuffer state */ + fb_state.width = dstsurf->width; + fb_state.height = dstsurf->height; + fb_state.nr_cbufs = 0; + fb_state.cbufs[0] = 0; + fb_state.zsbuf = dstsurf; + pipe->set_framebuffer_state(pipe, &fb_state); + + blitter_set_dst_dimensions(ctx, dstsurf->width, dstsurf->height); + blitter->draw_rectangle(blitter, 0, 0, dstsurf->width, dstsurf->height, 0, + UTIL_BLITTER_ATTRIB_NONE, NULL); + blitter_restore_CSOs(ctx); +} diff --git a/src/gallium/auxiliary/util/u_blitter.h b/src/gallium/auxiliary/util/u_blitter.h index ba3f92eca8..f316587dea 100644 --- a/src/gallium/auxiliary/util/u_blitter.h +++ b/src/gallium/auxiliary/util/u_blitter.h @@ -200,6 +200,8 @@ void util_blitter_clear_depth_stencil(struct blitter_context *blitter, unsigned dstx, unsigned dsty, unsigned width, unsigned height); +void util_blitter_flush_depth_stencil(struct blitter_context *blitter, + struct pipe_surface *dstsurf); /* The functions below should be used to save currently bound constant state * objects inside a driver. The objects are automatically restored at the end * of the util_blitter_{clear, copy_region, fill_region} functions and then diff --git a/src/gallium/drivers/r300/r300_blit.c b/src/gallium/drivers/r300/r300_blit.c index d125196b6d..6f8d9abfc8 100644 --- a/src/gallium/drivers/r300/r300_blit.c +++ b/src/gallium/drivers/r300/r300_blit.c @@ -22,6 +22,7 @@ #include "r300_context.h" #include "r300_texture.h" +#include "r300_winsys.h" #include "util/u_format.h" #include "util/u_pack_color.h" @@ -81,7 +82,7 @@ static void r300_blitter_end(struct r300_context *r300) } static uint32_t r300_depth_clear_cb_value(enum pipe_format format, - const float* rgba) + const float* rgba) { union util_color uc; util_pack_color(rgba, format, &uc); @@ -98,6 +99,9 @@ static boolean r300_cbzb_clear_allowed(struct r300_context *r300, struct pipe_framebuffer_state *fb = (struct pipe_framebuffer_state*)r300->fb_state.state; + if (r300->z_fastfill) + clear_buffers &= ~(PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL); + /* Only color clear allowed, and only one colorbuffer. */ if (clear_buffers != PIPE_CLEAR_COLOR || fb->nr_cbufs != 1) return FALSE; @@ -105,6 +109,23 @@ static boolean r300_cbzb_clear_allowed(struct r300_context *r300, return r300_surface(fb->cbufs[0])->cbzb_allowed; } +static uint32_t r300_depth_clear_value(enum pipe_format format, + double depth, unsigned stencil) +{ + switch (format) { + case PIPE_FORMAT_Z16_UNORM: + case PIPE_FORMAT_X8Z24_UNORM: + return util_pack_z(format, depth); + + case PIPE_FORMAT_S8_USCALED_Z24_UNORM: + return util_pack_z_stencil(format, depth, stencil); + + default: + assert(0); + return 0; + } +} + /* Clear currently bound buffers. */ static void r300_clear(struct pipe_context* pipe, unsigned buffers, @@ -154,6 +175,22 @@ static void r300_clear(struct pipe_context* pipe, (struct r300_hyperz_state*)r300->hyperz_state.state; uint32_t width = fb->width; uint32_t height = fb->height; + boolean has_hyperz = r300->rws->get_value(r300->rws, R300_CAN_HYPERZ); + uint32_t hyperz_dcv = 0; + + /* Enable fast Z clear. + * The zbuffer must be in micro-tiled mode, otherwise it locks up. */ + if ((buffers & (PIPE_CLEAR_DEPTH|PIPE_CLEAR_STENCIL)) && has_hyperz) { + + hyperz_dcv = hyperz->zb_depthclearvalue = + r300_depth_clear_value(fb->zsbuf->format, depth, stencil); + + r300_mark_fb_state_dirty(r300, R300_CHANGED_ZCLEAR_FLAG); + if (r300->z_compression || r300->z_fastfill) + r300->zmask_clear.dirty = TRUE; + if (r300->hiz_enable) + r300->hiz_clear.dirty = TRUE; + } /* Enable CBZB clear. */ if (r300_cbzb_clear_allowed(r300, buffers)) { @@ -181,6 +218,7 @@ static void r300_clear(struct pipe_context* pipe, /* Disable CBZB clear. */ if (r300->cbzb_clear) { r300->cbzb_clear = FALSE; + hyperz->zb_depthclearvalue = hyperz_dcv; r300_mark_fb_state_dirty(r300, R300_CHANGED_CBZB_FLAG); } @@ -221,6 +259,29 @@ static void r300_clear_depth_stencil(struct pipe_context *pipe, r300_blitter_end(r300); } +/* Clear a region of a depth stencil surface. */ +static void r300_flush_depth_stencil(struct pipe_context *pipe, + struct pipe_resource *dst, + struct pipe_subresource subdst) +{ + struct r300_context *r300 = r300_context(pipe); + struct pipe_surface *dstsurf; + struct r300_texture *tex = r300_texture(dst); + + /* only flush the zmask if we have one attached to this texture */ + if (!tex->zmask_mem[subdst.level]) + return; + + dstsurf = pipe->screen->get_tex_surface(pipe->screen, dst, + subdst.face, subdst.level, 0, + PIPE_BIND_DEPTH_STENCIL); + r300->z_decomp_rd = TRUE; + r300_blitter_begin(r300, R300_CLEAR_SURFACE); + util_blitter_flush_depth_stencil(r300->blitter, dstsurf); + r300_blitter_end(r300); + r300->z_decomp_rd = FALSE; +} + /* Copy a block of pixels from one surface to another using HW. */ static void r300_hw_copy_region(struct pipe_context* pipe, struct pipe_resource *dst, @@ -252,7 +313,7 @@ static void r300_resource_copy_region(struct pipe_context *pipe, { enum pipe_format old_format = dst->format; enum pipe_format new_format = old_format; - + boolean is_depth; if (!pipe->screen->is_format_supported(pipe->screen, old_format, src->target, src->nr_samples, @@ -279,6 +340,10 @@ static void r300_resource_copy_region(struct pipe_context *pipe, } } + is_depth = util_format_get_component_bits(src->format, UTIL_FORMAT_COLORSPACE_ZS, 0) != 0; + if (is_depth) { + r300_flush_depth_stencil(pipe, src, subsrc); + } if (old_format != new_format) { dst->format = new_format; src->format = new_format; diff --git a/src/gallium/drivers/r300/r300_chipset.c b/src/gallium/drivers/r300/r300_chipset.c index 21f3b9d261..2df25f9c8e 100644 --- a/src/gallium/drivers/r300/r300_chipset.c +++ b/src/gallium/drivers/r300/r300_chipset.c @@ -36,7 +36,7 @@ void r300_parse_chipset(struct r300_capabilities* caps) caps->num_vert_fpus = 2; caps->num_tex_units = 16; caps->has_tcl = debug_get_bool_option("RADEON_NO_TCL", FALSE) ? FALSE : TRUE; - caps->has_hiz = TRUE; + caps->hiz_ram = 0; caps->is_r400 = FALSE; caps->is_r500 = FALSE; caps->high_second_pipe = FALSE; @@ -49,6 +49,8 @@ void r300_parse_chipset(struct r300_capabilities* caps) caps->family = CHIP_FAMILY_R300; caps->high_second_pipe = TRUE; caps->num_vert_fpus = 4; + caps->hiz_ram = R300_HIZ_LIMIT; + caps->zmask_ram = PIPE_ZMASK_SIZE; break; case 0x4145: @@ -61,6 +63,8 @@ void r300_parse_chipset(struct r300_capabilities* caps) caps->family = CHIP_FAMILY_R300; caps->high_second_pipe = TRUE; caps->num_vert_fpus = 4; + caps->hiz_ram = R300_HIZ_LIMIT; + caps->zmask_ram = PIPE_ZMASK_SIZE; break; case 0x4150: @@ -77,8 +81,8 @@ void r300_parse_chipset(struct r300_capabilities* caps) case 0x4E54: case 0x4E56: caps->family = CHIP_FAMILY_RV350; - caps->has_hiz = FALSE; caps->high_second_pipe = TRUE; + caps->zmask_ram = RV3xx_ZMASK_SIZE; break; case 0x4148: @@ -91,12 +95,16 @@ void r300_parse_chipset(struct r300_capabilities* caps) caps->family = CHIP_FAMILY_R350; caps->high_second_pipe = TRUE; caps->num_vert_fpus = 4; + caps->hiz_ram = R300_HIZ_LIMIT; + caps->zmask_ram = PIPE_ZMASK_SIZE; break; case 0x4E4A: caps->family = CHIP_FAMILY_R360; caps->high_second_pipe = TRUE; caps->num_vert_fpus = 4; + caps->hiz_ram = R300_HIZ_LIMIT; + caps->zmask_ram = PIPE_ZMASK_SIZE; break; case 0x5460: @@ -108,8 +116,8 @@ void r300_parse_chipset(struct r300_capabilities* caps) case 0x5B64: case 0x5B65: caps->family = CHIP_FAMILY_RV370; - caps->has_hiz = FALSE; caps->high_second_pipe = TRUE; + caps->zmask_ram = RV3xx_ZMASK_SIZE; break; case 0x3150: @@ -120,6 +128,8 @@ void r300_parse_chipset(struct r300_capabilities* caps) case 0x3E54: caps->family = CHIP_FAMILY_RV380; caps->high_second_pipe = TRUE; + caps->hiz_ram = R300_HIZ_LIMIT; + caps->zmask_ram = RV3xx_ZMASK_SIZE; break; case 0x4A48: @@ -135,6 +145,8 @@ void r300_parse_chipset(struct r300_capabilities* caps) caps->family = CHIP_FAMILY_R420; caps->num_vert_fpus = 6; caps->is_r400 = TRUE; + caps->hiz_ram = R300_HIZ_LIMIT; + caps->zmask_ram = PIPE_ZMASK_SIZE; break; case 0x5548: @@ -149,6 +161,8 @@ void r300_parse_chipset(struct r300_capabilities* caps) caps->family = CHIP_FAMILY_R423; caps->num_vert_fpus = 6; caps->is_r400 = TRUE; + caps->hiz_ram = R300_HIZ_LIMIT; + caps->zmask_ram = PIPE_ZMASK_SIZE; break; case 0x554C: @@ -161,6 +175,8 @@ void r300_parse_chipset(struct r300_capabilities* caps) caps->family = CHIP_FAMILY_R430; caps->num_vert_fpus = 6; caps->is_r400 = TRUE; + caps->hiz_ram = R300_HIZ_LIMIT; + caps->zmask_ram = PIPE_ZMASK_SIZE; break; case 0x5D4C: @@ -172,6 +188,8 @@ void r300_parse_chipset(struct r300_capabilities* caps) caps->family = CHIP_FAMILY_R480; caps->num_vert_fpus = 6; caps->is_r400 = TRUE; + caps->hiz_ram = R300_HIZ_LIMIT; + caps->zmask_ram = PIPE_ZMASK_SIZE; break; case 0x4B48: @@ -182,6 +200,8 @@ void r300_parse_chipset(struct r300_capabilities* caps) caps->family = CHIP_FAMILY_R481; caps->num_vert_fpus = 6; caps->is_r400 = TRUE; + caps->hiz_ram = R300_HIZ_LIMIT; + caps->zmask_ram = PIPE_ZMASK_SIZE; break; case 0x5E4C: @@ -199,34 +219,36 @@ void r300_parse_chipset(struct r300_capabilities* caps) caps->family = CHIP_FAMILY_RV410; caps->num_vert_fpus = 6; caps->is_r400 = TRUE; + caps->hiz_ram = R300_HIZ_LIMIT; + caps->zmask_ram = PIPE_ZMASK_SIZE; break; case 0x5954: case 0x5955: caps->family = CHIP_FAMILY_RS480; - caps->has_hiz = FALSE; caps->has_tcl = FALSE; + caps->zmask_ram = RV3xx_ZMASK_SIZE; break; case 0x5974: case 0x5975: caps->family = CHIP_FAMILY_RS482; - caps->has_hiz = FALSE; caps->has_tcl = FALSE; + caps->zmask_ram = RV3xx_ZMASK_SIZE; break; case 0x5A41: case 0x5A42: caps->family = CHIP_FAMILY_RS400; - caps->has_hiz = FALSE; caps->has_tcl = FALSE; + caps->zmask_ram = RV3xx_ZMASK_SIZE; break; case 0x5A61: case 0x5A62: caps->family = CHIP_FAMILY_RC410; - caps->has_hiz = FALSE; caps->has_tcl = FALSE; + caps->zmask_ram = RV3xx_ZMASK_SIZE; break; case 0x791E: @@ -234,6 +256,8 @@ void r300_parse_chipset(struct r300_capabilities* caps) caps->family = CHIP_FAMILY_RS690; caps->has_tcl = FALSE; caps->is_r400 = TRUE; + caps->hiz_ram = R300_HIZ_LIMIT; + caps->zmask_ram = PIPE_ZMASK_SIZE; break; case 0x793F: @@ -242,6 +266,8 @@ void r300_parse_chipset(struct r300_capabilities* caps) caps->family = CHIP_FAMILY_RS600; caps->has_tcl = FALSE; caps->is_r400 = TRUE; + caps->hiz_ram = R300_HIZ_LIMIT; + caps->zmask_ram = PIPE_ZMASK_SIZE; break; case 0x796C: @@ -251,6 +277,8 @@ void r300_parse_chipset(struct r300_capabilities* caps) caps->family = CHIP_FAMILY_RS740; caps->has_tcl = FALSE; caps->is_r400 = TRUE; + caps->hiz_ram = R300_HIZ_LIMIT; + caps->zmask_ram = PIPE_ZMASK_SIZE; break; case 0x7100: @@ -270,6 +298,8 @@ void r300_parse_chipset(struct r300_capabilities* caps) caps->family = CHIP_FAMILY_R520; caps->num_vert_fpus = 8; caps->is_r500 = TRUE; + caps->hiz_ram = R300_HIZ_LIMIT; + caps->zmask_ram = PIPE_ZMASK_SIZE; break; case 0x7140: @@ -313,6 +343,8 @@ void r300_parse_chipset(struct r300_capabilities* caps) caps->family = CHIP_FAMILY_RV515; caps->num_vert_fpus = 2; caps->is_r500 = TRUE; + caps->hiz_ram = R300_HIZ_LIMIT; + caps->zmask_ram = PIPE_ZMASK_SIZE; break; case 0x71C0: @@ -334,6 +366,8 @@ void r300_parse_chipset(struct r300_capabilities* caps) caps->family = CHIP_FAMILY_RV530; caps->num_vert_fpus = 5; caps->is_r500 = TRUE; + caps->hiz_ram = RV530_HIZ_LIMIT; + caps->zmask_ram = PIPE_ZMASK_SIZE; break; case 0x7240: @@ -354,12 +388,16 @@ void r300_parse_chipset(struct r300_capabilities* caps) caps->family = CHIP_FAMILY_R580; caps->num_vert_fpus = 8; caps->is_r500 = TRUE; + caps->hiz_ram = RV530_HIZ_LIMIT; + caps->zmask_ram = PIPE_ZMASK_SIZE; break; case 0x7280: caps->family = CHIP_FAMILY_RV570; caps->num_vert_fpus = 8; caps->is_r500 = TRUE; + caps->hiz_ram = RV530_HIZ_LIMIT; + caps->zmask_ram = PIPE_ZMASK_SIZE; break; case 0x7281: @@ -376,6 +414,8 @@ void r300_parse_chipset(struct r300_capabilities* caps) caps->family = CHIP_FAMILY_RV560; caps->num_vert_fpus = 8; caps->is_r500 = TRUE; + caps->hiz_ram = RV530_HIZ_LIMIT; + caps->zmask_ram = PIPE_ZMASK_SIZE; break; default: diff --git a/src/gallium/drivers/r300/r300_chipset.h b/src/gallium/drivers/r300/r300_chipset.h index 65750f54e7..e7ca642b4f 100644 --- a/src/gallium/drivers/r300/r300_chipset.h +++ b/src/gallium/drivers/r300/r300_chipset.h @@ -25,6 +25,14 @@ #include "pipe/p_compiler.h" +/* these are sizes in dwords */ +#define R300_HIZ_LIMIT 10240 +#define RV530_HIZ_LIMIT 15360 + +/* rv3xx have only one pipe */ +#define PIPE_ZMASK_SIZE 4096 +#define RV3xx_ZMASK_SIZE 5120 + /* Structure containing all the possible information about a specific Radeon * in the R3xx, R4xx, and R5xx families. */ struct r300_capabilities { @@ -42,8 +50,10 @@ struct r300_capabilities { unsigned num_tex_units; /* Whether or not TCL is physically present */ boolean has_tcl; - /* Some chipsets do not have HiZ RAM. */ - boolean has_hiz; + /* Some chipsets do not have HiZ RAM - other have varying amounts . */ + int hiz_ram; + /* some chipsets have zmask ram per pipe some don't */ + int zmask_ram; /* Whether or not this is RV350 or newer, including all r400 and r500 * chipsets. The differences compared to the oldest r300 chips are: * - Blend LTE/GTE thresholds diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c index df90359058..0668fbc151 100644 --- a/src/gallium/drivers/r300/r300_context.c +++ b/src/gallium/drivers/r300/r300_context.c @@ -30,6 +30,7 @@ #include "r300_cb.h" #include "r300_context.h" #include "r300_emit.h" +#include "r300_hyperz.h" #include "r300_screen.h" #include "r300_screen_buffer.h" #include "r300_winsys.h" @@ -114,6 +115,10 @@ static void r300_destroy_context(struct pipe_context* context) u_upload_destroy(r300->upload_vb); u_upload_destroy(r300->upload_ib); + /* setup hyper-z mm */ + if (r300->rws->get_value(r300->rws, R300_CAN_HYPERZ)) + r300_hyperz_destroy_mm(r300); + translate_cache_destroy(r300->tran.translate_cache); r300_release_referenced_objects(r300); @@ -166,6 +171,8 @@ static void r300_setup_atoms(struct r300_context* r300) boolean is_r500 = r300->screen->caps.is_r500; boolean has_tcl = r300->screen->caps.has_tcl; boolean drm_2_3_0 = r300->rws->get_value(r300->rws, R300_VID_DRM_2_3_0); + boolean has_hyperz = r300->rws->get_value(r300->rws, R300_CAN_HYPERZ); + boolean has_hiz_ram = r300->screen->caps.hiz_ram > 0; /* Create the actual atom list. * @@ -188,8 +195,9 @@ static void r300_setup_atoms(struct r300_context* r300) R300_INIT_ATOM(gpu_flush, 9); R300_INIT_ATOM(aa_state, 4); R300_INIT_ATOM(fb_state, 0); + if (has_hyperz) + R300_INIT_ATOM(hyperz_state, is_rv350 ? 10 : 8); /* ZB (unpipelined), SC. */ - R300_INIT_ATOM(hyperz_state, 6); R300_INIT_ATOM(ztop_state, 2); /* ZB, FG. */ R300_INIT_ATOM(dsa_state, is_r500 ? 8 : 6); @@ -220,6 +228,13 @@ static void r300_setup_atoms(struct r300_context* r300) /* TX. */ R300_INIT_ATOM(texture_cache_inval, 2); R300_INIT_ATOM(textures_state, 0); + if (has_hyperz) { + /* HiZ Clear */ + if (has_hiz_ram) + R300_INIT_ATOM(hiz_clear, 0); + /* zmask clear */ + R300_INIT_ATOM(zmask_clear, 0); + } /* ZB (unpipelined), SU. */ R300_INIT_ATOM(query_start, 4); @@ -236,7 +251,8 @@ static void r300_setup_atoms(struct r300_context* r300) r300->clip_state.state = CALLOC_STRUCT(r300_clip_state); r300->fb_state.state = CALLOC_STRUCT(pipe_framebuffer_state); r300->gpu_flush.state = CALLOC_STRUCT(pipe_framebuffer_state); - r300->hyperz_state.state = CALLOC_STRUCT(r300_hyperz_state); + if (has_hyperz) + r300->hyperz_state.state = CALLOC_STRUCT(r300_hyperz_state); r300->invariant_state.state = CALLOC_STRUCT(r300_invariant_state); r300->rs_block_state.state = CALLOC_STRUCT(r300_rs_block); r300->scissor_state.state = CALLOC_STRUCT(pipe_scissor_state); @@ -282,8 +298,7 @@ static void r300_init_states(struct pipe_context *pipe) (struct r300_vap_invariant_state*)r300->vap_invariant_state.state; struct r300_invariant_state *invariant = (struct r300_invariant_state*)r300->invariant_state.state; - struct r300_hyperz_state *hyperz = - (struct r300_hyperz_state*)r300->hyperz_state.state; + CB_LOCALS; pipe->set_blend_color(pipe, &bc); @@ -350,11 +365,20 @@ static void r300_init_states(struct pipe_context *pipe) } /* Initialize the hyperz state. */ + if (r300->rws->get_value(r300->rws, R300_CAN_HYPERZ)) { - BEGIN_CB(&hyperz->cb_begin, r300->hyperz_state.size); + struct r300_hyperz_state *hyperz = + (struct r300_hyperz_state*)r300->hyperz_state.state; + BEGIN_CB(&hyperz->cb_flush_begin, r300->hyperz_state.size); + OUT_CB_REG(R300_ZB_ZCACHE_CTLSTAT, + R300_ZB_ZCACHE_CTLSTAT_ZC_FLUSH_FLUSH_AND_FREE); OUT_CB_REG(R300_ZB_BW_CNTL, 0); OUT_CB_REG(R300_ZB_DEPTHCLEARVALUE, 0); OUT_CB_REG(R300_SC_HYPERZ, R300_SC_HYPERZ_ADJ_2); + + if (r300->screen->caps.is_rv350) { + OUT_CB_REG(R300_GB_Z_PEQ_CONFIG, 0); + } END_CB; } } @@ -415,6 +439,10 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen, rws->cs_set_flush(r300->cs, r300_flush_cb, r300); + /* setup hyper-z mm */ + if (r300->rws->get_value(r300->rws, R300_CAN_HYPERZ)) + r300_hyperz_init_mm(r300); + r300->upload_ib = u_upload_create(&r300->context, 32 * 1024, 16, PIPE_BIND_INDEX_BUFFER); diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h index 7c77a46016..d86a5c8fc9 100644 --- a/src/gallium/drivers/r300/r300_context.h +++ b/src/gallium/drivers/r300/r300_context.h @@ -106,13 +106,19 @@ struct r300_dsa_state { }; struct r300_hyperz_state { + int current_func; /* -1 after a clear before first op */ + int flush; /* This is actually a command buffer with named dwords. */ + uint32_t cb_flush_begin; + uint32_t zb_zcache_ctlstat; /* R300_ZB_CACHE_CNTL */ uint32_t cb_begin; uint32_t zb_bw_cntl; /* R300_ZB_BW_CNTL */ uint32_t cb_reg1; uint32_t zb_depthclearvalue; /* R300_ZB_DEPTHCLEARVALUE */ uint32_t cb_reg2; uint32_t sc_hyperz; /* R300_SC_HYPERZ */ + uint32_t cb_reg3; + uint32_t gb_z_peq_config; /* R300_GB_Z_PEQ_CONFIG: 0x4028 */ }; struct r300_gpu_flush { @@ -321,6 +327,7 @@ struct r300_surface { /* Whether the CBZB clear is allowed on the surface. */ boolean cbzb_allowed; + }; struct r300_texture_desc { @@ -387,6 +394,10 @@ struct r300_texture { /* All bits should be filled in. */ struct r300_texture_fb_state fb_state; + /* hyper-z memory allocs */ + struct mem_block *hiz_mem[R300_MAX_TEXTURE_LEVELS]; + struct mem_block *zmask_mem[R300_MAX_TEXTURE_LEVELS]; + /* This is the level tiling flags were last time set for. * It's used to prevent redundant tiling-flags changes from happening.*/ unsigned surface_level; @@ -512,6 +523,10 @@ struct r300_context { struct r300_atom texture_cache_inval; /* GPU flush. */ struct r300_atom gpu_flush; + /* HiZ clear */ + struct r300_atom hiz_clear; + /* zmask clear */ + struct r300_atom zmask_clear; /* Invariant state. This must be emitted to get the engine started. */ struct r300_atom invariant_state; @@ -549,8 +564,19 @@ struct r300_context { boolean two_sided_color; /* Incompatible vertex buffer layout? (misaligned stride or buffer_offset) */ boolean incompatible_vb_layout; - + /* Whether fast zclear is enabled. */ + boolean z_fastfill; +#define R300_Z_COMPRESS_44 1 +#define RV350_Z_COMPRESS_88 2 + int z_compression; + boolean hiz_enable; boolean cbzb_clear; + boolean z_decomp_rd; + + /* two mem block managers for hiz/zmask ram space */ + struct mem_block *hiz_mm; + struct mem_block *zmask_mm; + /* upload managers */ struct u_upload_mgr *upload_vb; struct u_upload_mgr *upload_ib; @@ -621,7 +647,8 @@ void r300_plug_in_stencil_ref_fallback(struct r300_context *r300); /* r300_state.c */ enum r300_fb_state_change { R300_CHANGED_FB_STATE = 0, - R300_CHANGED_CBZB_FLAG + R300_CHANGED_CBZB_FLAG, + R300_CHANGED_ZCLEAR_FLAG }; void r300_mark_fb_state_dirty(struct r300_context *r300, diff --git a/src/gallium/drivers/r300/r300_debug.c b/src/gallium/drivers/r300/r300_debug.c index 053a64ea6d..c3e157e99a 100644 --- a/src/gallium/drivers/r300/r300_debug.c +++ b/src/gallium/drivers/r300/r300_debug.c @@ -44,6 +44,7 @@ static const struct debug_named_value debug_options[] = { { "notiling", DBG_NO_TILING, "Disable tiling (for benchmarking)" }, { "noimmd", DBG_NO_IMMD, "Disable immediate mode (for benchmarking)" }, { "stats", DBG_STATS, "Gather statistics" }, + { "hyperz", DBG_HYPERZ, "HyperZ (for debugging)" }, /* must be last */ DEBUG_NAMED_VALUE_END diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c index 36a26a7871..17e180a79a 100644 --- a/src/gallium/drivers/r300/r300_emit.c +++ b/src/gallium/drivers/r300/r300_emit.c @@ -25,6 +25,7 @@ #include "util/u_format.h" #include "util/u_math.h" +#include "util/u_mm.h" #include "util/u_simple_list.h" #include "r300_context.h" @@ -329,6 +330,7 @@ void r300_emit_fb_state(struct r300_context* r300, unsigned size, void* state) struct pipe_framebuffer_state* fb = (struct pipe_framebuffer_state*)state; struct r300_surface* surf; unsigned i; + boolean has_hyperz = r300->rws->get_value(r300->rws, R300_CAN_HYPERZ); CS_LOCALS(r300); BEGIN_CS(size); @@ -364,6 +366,10 @@ void r300_emit_fb_state(struct r300_context* r300, unsigned size, void* state) OUT_CS_REG_SEQ(R300_ZB_DEPTHPITCH, 1); OUT_CS_RELOC(surf->buffer, surf->cbzb_pitch, 0, surf->domain); + + DBG(r300, DBG_CBZB, + "CBZB clearing cbuf %08x %08x\n", surf->cbzb_format, + surf->cbzb_pitch); } /* Set up a zbuffer. */ else if (fb->zsbuf) { @@ -377,15 +383,32 @@ void r300_emit_fb_state(struct r300_context* r300, unsigned size, void* state) OUT_CS_REG_SEQ(R300_ZB_DEPTHPITCH, 1); OUT_CS_RELOC(surf->buffer, surf->pitch, 0, surf->domain); - /* HiZ RAM. */ - if (r300->screen->caps.has_hiz) { - OUT_CS_REG(R300_ZB_HIZ_OFFSET, 0); - OUT_CS_REG(R300_ZB_HIZ_PITCH, 0); + if (has_hyperz) { + uint32_t surf_pitch; + struct r300_texture *tex; + int level = surf->base.level; + tex = r300_texture(surf->base.texture); + + surf_pitch = surf->pitch & R300_DEPTHPITCH_MASK; + /* HiZ RAM. */ + if (r300->screen->caps.hiz_ram) { + if (tex->hiz_mem[level]) { + OUT_CS_REG(R300_ZB_HIZ_OFFSET, tex->hiz_mem[level]->ofs); + OUT_CS_REG(R300_ZB_HIZ_PITCH, surf_pitch); + } else { + OUT_CS_REG(R300_ZB_HIZ_OFFSET, 0); + OUT_CS_REG(R300_ZB_HIZ_PITCH, 0); + } + } + /* Z Mask RAM. (compressed zbuffer) */ + if (tex->zmask_mem[level]) { + OUT_CS_REG(R300_ZB_ZMASK_OFFSET, tex->zmask_mem[level]->ofs); + OUT_CS_REG(R300_ZB_ZMASK_PITCH, surf_pitch); + } else { + OUT_CS_REG(R300_ZB_ZMASK_OFFSET, 0); + OUT_CS_REG(R300_ZB_ZMASK_PITCH, 0); + } } - - /* Z Mask RAM. (compressed zbuffer) */ - OUT_CS_REG(R300_ZB_ZMASK_OFFSET, 0); - OUT_CS_REG(R300_ZB_ZMASK_PITCH, 0); } END_CS; @@ -394,8 +417,12 @@ void r300_emit_fb_state(struct r300_context* r300, unsigned size, void* state) void r300_emit_hyperz_state(struct r300_context *r300, unsigned size, void *state) { + struct r300_hyperz_state *z = state; CS_LOCALS(r300); - WRITE_CS_TABLE(state, size); + if (z->flush) + WRITE_CS_TABLE(&z->cb_flush_begin, size); + else + WRITE_CS_TABLE(&z->cb_begin, size - 2); } void r300_emit_hyperz_end(struct r300_context *r300) @@ -403,9 +430,11 @@ void r300_emit_hyperz_end(struct r300_context *r300) struct r300_hyperz_state z = *(struct r300_hyperz_state*)r300->hyperz_state.state; + z.flush = 1; z.zb_bw_cntl = 0; z.zb_depthclearvalue = 0; z.sc_hyperz = R300_SC_HYPERZ_ADJ_2; + z.gb_z_peq_config = 0; r300_emit_hyperz_state(r300, r300->hyperz_state.size, &z); } @@ -943,6 +972,101 @@ void r300_emit_viewport_state(struct r300_context* r300, END_CS; } +static void r300_emit_hiz_line_clear(struct r300_context *r300, int start, uint16_t count, uint32_t val) +{ + CS_LOCALS(r300); + BEGIN_CS(4); + OUT_CS_PKT3(R300_PACKET3_3D_CLEAR_HIZ, 2); + OUT_CS(start); + OUT_CS(count); + OUT_CS(val); + END_CS; +} + +static void r300_emit_zmask_line_clear(struct r300_context *r300, int start, uint16_t count, uint32_t val) +{ + CS_LOCALS(r300); + BEGIN_CS(4); + OUT_CS_PKT3(R300_PACKET3_3D_CLEAR_ZMASK, 2); + OUT_CS(start); + OUT_CS(count); + OUT_CS(val); + END_CS; +} + +#define ALIGN_DIVUP(x, y) (((x) + (y) - 1) / (y)) + +void r300_emit_hiz_clear(struct r300_context *r300, unsigned size, void *state) +{ + struct pipe_framebuffer_state *fb = + (struct pipe_framebuffer_state*)r300->fb_state.state; + struct r300_hyperz_state *z = + (struct r300_hyperz_state*)r300->hyperz_state.state; + struct r300_screen* r300screen = r300->screen; + uint32_t stride, offset = 0, height, offset_shift; + struct r300_texture* tex; + int i; + + tex = r300_texture(fb->zsbuf->texture); + stride = tex->desc.stride_in_pixels[fb->zsbuf->level]; + + /* convert from pixels to 4x4 blocks */ + stride = ALIGN_DIVUP(stride, 4); + + stride = ALIGN_DIVUP(stride, r300screen->caps.num_frag_pipes); + /* there are 4 blocks per dwords */ + stride = ALIGN_DIVUP(stride, 4); + + height = ALIGN_DIVUP(fb->zsbuf->height, 4); + + offset_shift = 2; + offset_shift += (r300screen->caps.num_frag_pipes / 2); + + for (i = 0; i < height; i++) { + offset = i * stride; + offset <<= offset_shift; + r300_emit_hiz_line_clear(r300, offset, stride, 0xffffffff); + } + z->current_func = -1; +} + +void r300_emit_zmask_clear(struct r300_context *r300, unsigned size, void *state) +{ + struct pipe_framebuffer_state *fb = + (struct pipe_framebuffer_state*)r300->fb_state.state; + struct r300_screen* r300screen = r300->screen; + uint32_t stride, offset = 0; + struct r300_texture* tex; + uint32_t i, height; + int mult, offset_shift; + + tex = r300_texture(fb->zsbuf->texture); + stride = tex->desc.stride_in_pixels[fb->zsbuf->level]; + + if (r300->z_compression == RV350_Z_COMPRESS_88) + mult = 8; + else + mult = 4; + + height = ALIGN_DIVUP(fb->zsbuf->height, mult); + + offset_shift = 4; + offset_shift += (r300screen->caps.num_frag_pipes / 2); + stride = ALIGN_DIVUP(stride, r300screen->caps.num_frag_pipes); + + /* okay have width in pixels - divide by block width */ + stride = ALIGN_DIVUP(stride, mult); + /* have width in blocks - divide by number of fragment pipes screen width */ + /* 16 blocks per dword */ + stride = ALIGN_DIVUP(stride, 16); + + for (i = 0; i < height; i++) { + offset = i * stride; + offset <<= offset_shift; + r300_emit_zmask_line_clear(r300, offset, stride, 0x0);//0xffffffff); + } +} + void r300_emit_ztop_state(struct r300_context* r300, unsigned size, void* state) { diff --git a/src/gallium/drivers/r300/r300_emit.h b/src/gallium/drivers/r300/r300_emit.h index 5d05039669..2f2c2f2dcb 100644 --- a/src/gallium/drivers/r300/r300_emit.h +++ b/src/gallium/drivers/r300/r300_emit.h @@ -112,6 +112,9 @@ void r300_emit_texture_cache_inval(struct r300_context* r300, unsigned size, voi void r300_emit_invariant_state(struct r300_context *r300, unsigned size, void *state); +void r300_emit_hiz_clear(struct r300_context *r300, unsigned size, void *state); +void r300_emit_zmask_clear(struct r300_context *r300, unsigned size, void *state); + unsigned r300_get_num_dirty_dwords(struct r300_context *r300); /* Emit all dirty state. */ diff --git a/src/gallium/drivers/r300/r300_flush.c b/src/gallium/drivers/r300/r300_flush.c index fe182b6615..7fed9b5d07 100644 --- a/src/gallium/drivers/r300/r300_flush.c +++ b/src/gallium/drivers/r300/r300_flush.c @@ -44,7 +44,8 @@ static void r300_flush(struct pipe_context* pipe, u_upload_flush(r300->upload_ib); if (r300->dirty_hw) { - r300_emit_hyperz_end(r300); + if (r300->rws->get_value(r300->rws, R300_CAN_HYPERZ)) + r300_emit_hyperz_end(r300); r300_emit_query_end(r300); r300->flush_counter++; diff --git a/src/gallium/drivers/r300/r300_hyperz.c b/src/gallium/drivers/r300/r300_hyperz.c index e952895601..e719342a46 100644 --- a/src/gallium/drivers/r300/r300_hyperz.c +++ b/src/gallium/drivers/r300/r300_hyperz.c @@ -21,25 +21,158 @@ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE * USE OR OTHER DEALINGS IN THE SOFTWARE. */ +#include "util/u_format.h" +#include "util/u_mm.h" #include "r300_context.h" #include "r300_hyperz.h" #include "r300_reg.h" #include "r300_fs.h" +#include "r300_emit.h" +#include "r300_texture.h" +/* + HiZ rules - taken from various docs + 1. HiZ only works on depth values + 2. Cannot HiZ if stencil fail or zfail is !KEEP + 3. on R300/400, HiZ is disabled if depth test is EQUAL + 4. comparison changes without clears usually mean disabling HiZ +*/ /*****************************************************************************/ /* The HyperZ setup */ /*****************************************************************************/ +static bool r300_get_sc_hz_max(struct r300_context *r300) +{ + struct r300_dsa_state *dsa_state = r300->dsa_state.state; + int func = dsa_state->z_stencil_control & 0x7; + int ret = R300_SC_HYPERZ_MIN; + + if (func >= 4 && func <= 7) + ret = R300_SC_HYPERZ_MAX; + return ret; +} + +static bool r300_zfunc_same_direction(int func1, int func2) +{ + /* func1 is less/lessthan */ + if (func1 == 1 || func1 == 2) + if (func2 == 3 || func2 == 4 || func2 == 5) + return FALSE; + + if (func2 == 1 || func2 == 2) + if (func1 == 4 || func1 == 5) + return FALSE; + return TRUE; +} + +static int r300_get_hiz_min(struct r300_context *r300) +{ + struct r300_dsa_state *dsa_state = r300->dsa_state.state; + int func = dsa_state->z_stencil_control & 0x7; + int ret = R300_HIZ_MIN; + + if (func == 1 || func == 2) + ret = R300_HIZ_MAX; + return ret; +} + +static boolean r300_dsa_stencil_op_not_keep(struct pipe_stencil_state *s) +{ + if (s->enabled && (s->fail_op != PIPE_STENCIL_OP_KEEP || + s->zfail_op != PIPE_STENCIL_OP_KEEP)) + return TRUE; + return FALSE; +} + +static boolean r300_can_hiz(struct r300_context *r300) +{ + struct r300_dsa_state *dsa_state = r300->dsa_state.state; + struct pipe_depth_stencil_alpha_state *dsa = &dsa_state->dsa; + struct r300_screen* r300screen = r300->screen; + struct r300_hyperz_state *z = r300->hyperz_state.state; + + /* shader writes depth - no HiZ */ + if (r300_fragment_shader_writes_depth(r300_fs(r300))) /* (5) */ + return FALSE; + + if (r300->query_current) + return FALSE; + /* if stencil fail/zfail op is not KEEP */ + if (r300_dsa_stencil_op_not_keep(&dsa->stencil[0]) || + r300_dsa_stencil_op_not_keep(&dsa->stencil[1])) + return FALSE; + + if (dsa->depth.enabled) { + /* if depth func is EQUAL pre-r500 */ + if (dsa->depth.func == PIPE_FUNC_EQUAL && !r300screen->caps.is_r500) + return FALSE; + /* if depth func is NOTEQUAL */ + if (dsa->depth.func == PIPE_FUNC_NOTEQUAL) + return FALSE; + } + /* depth comparison function - if just cleared save and return okay */ + if (z->current_func == -1) { + int func = dsa_state->z_stencil_control & 0x7; + if (func != 0 && func != 7) + z->current_func = dsa_state->z_stencil_control & 0x7; + } else { + /* simple don't change */ + if (!r300_zfunc_same_direction(z->current_func, (dsa_state->z_stencil_control & 0x7))) { + DBG(r300, DBG_HYPERZ, "z func changed direction - disabling hyper-z %d -> %d\n", z->current_func, dsa_state->z_stencil_control); + return FALSE; + } + } + return TRUE; +} + static void r300_update_hyperz(struct r300_context* r300) { struct r300_hyperz_state *z = (struct r300_hyperz_state*)r300->hyperz_state.state; + z->gb_z_peq_config = 0; z->zb_bw_cntl = 0; z->sc_hyperz = R300_SC_HYPERZ_ADJ_2; + z->flush = 0; - if (r300->cbzb_clear) + if (r300->cbzb_clear) { z->zb_bw_cntl |= R300_ZB_CB_CLEAR_CACHE_LINE_WRITE_ONLY; + return; + } + + /* Zbuffer compression. */ + if (r300->z_compression) { + z->zb_bw_cntl |= R300_RD_COMP_ENABLE; + if (r300->z_decomp_rd == false) + z->zb_bw_cntl |= R300_WR_COMP_ENABLE; + /* RV350 and up optimizations. */ + if (r300->z_compression == RV350_Z_COMPRESS_88) + z->gb_z_peq_config |= R300_GB_Z_PEQ_CONFIG_Z_PEQ_SIZE_8_8; + } + + /* Z fastfill. */ + if (r300->z_fastfill) { + z->zb_bw_cntl |= R300_FAST_FILL_ENABLE; /* | R300_FORCE_COMPRESSED_STENCIL_VALUE_ENABLE;*/ + } + + if (r300->hiz_enable) { + bool can_hiz = r300_can_hiz(r300); + if (can_hiz) { + z->zb_bw_cntl |= R300_HIZ_ENABLE; + z->sc_hyperz |= R300_SC_HYPERZ_ENABLE; + z->sc_hyperz |= r300_get_sc_hz_max(r300); + z->zb_bw_cntl |= r300_get_hiz_min(r300); + } + } + + if (r300->screen->caps.is_r500) { + /* XXX Are these bits really available on RV350? */ + z->zb_bw_cntl |= R500_HIZ_FP_EXP_BITS_3; + z->zb_bw_cntl |= + R500_HIZ_EQUAL_REJECT_ENABLE | + R500_PEQ_PACKING_ENABLE | + R500_COVERED_PTR_MASKING_ENABLE; + } } /*****************************************************************************/ @@ -126,15 +259,115 @@ static void r300_update_ztop(struct r300_context* r300) } else { ztop_state->z_buffer_top = R300_ZTOP_ENABLE; } - if (ztop_state->z_buffer_top != old_ztop) r300->ztop_state.dirty = TRUE; } +#define ALIGN_DIVUP(x, y) (((x) + (y) - 1) / (y)) + +static void r300_update_hiz_clear(struct r300_context *r300) +{ + struct pipe_framebuffer_state *fb = + (struct pipe_framebuffer_state*)r300->fb_state.state; + uint32_t height; + + height = ALIGN_DIVUP(fb->zsbuf->height, 4); + r300->hiz_clear.size = height * 4; +} + +static void r300_update_zmask_clear(struct r300_context *r300) +{ + struct pipe_framebuffer_state *fb = + (struct pipe_framebuffer_state*)r300->fb_state.state; + uint32_t height; + int mult; + + if (r300->z_compression == RV350_Z_COMPRESS_88) + mult = 8; + else + mult = 4; + + height = ALIGN_DIVUP(fb->zsbuf->height, mult); + + r300->zmask_clear.size = height * 4; +} + void r300_update_hyperz_state(struct r300_context* r300) { r300_update_ztop(r300); if (r300->hyperz_state.dirty) { r300_update_hyperz(r300); } + + if (r300->hiz_clear.dirty) { + r300_update_hiz_clear(r300); + } + if (r300->zmask_clear.dirty) { + r300_update_zmask_clear(r300); + } +} + +void r300_hiz_alloc_block(struct r300_context *r300, struct r300_surface *surf) +{ + struct r300_texture *tex; + uint32_t zsize, ndw; + int level = surf->base.level; + + tex = r300_texture(surf->base.texture); + + if (tex->hiz_mem[level]) + return; + + zsize = tex->desc.layer_size_in_bytes[level]; + zsize /= util_format_get_blocksize(tex->desc.b.b.format); + ndw = ALIGN_DIVUP(zsize, 64); + + tex->hiz_mem[level] = u_mmAllocMem(r300->hiz_mm, ndw, 0, 0); + return; +} + +void r300_zmask_alloc_block(struct r300_context *r300, struct r300_surface *surf, int compress) +{ + int bsize = 256; + uint32_t zsize, ndw; + int level = surf->base.level; + struct r300_texture *tex; + + tex = r300_texture(surf->base.texture); + + if (tex->zmask_mem[level]) + return; + + zsize = tex->desc.layer_size_in_bytes[level]; + zsize /= util_format_get_blocksize(tex->desc.b.b.format); + + /* each zmask dword represents 16 4x4 blocks - which is 256 pixels + or 16 8x8 depending on the gb peq flag = 1024 pixels */ + if (compress == RV350_Z_COMPRESS_88) + bsize = 1024; + + ndw = ALIGN_DIVUP(zsize, bsize); + tex->zmask_mem[level] = u_mmAllocMem(r300->zmask_mm, ndw, 0, 0); + return; +} + +void r300_hyperz_init_mm(struct r300_context *r300) +{ + struct r300_screen* r300screen = r300->screen; + int frag_pipes = r300screen->caps.num_frag_pipes; + + if (r300screen->caps.hiz_ram) + r300->hiz_mm = u_mmInit(0, r300screen->caps.hiz_ram * frag_pipes); + + r300->zmask_mm = u_mmInit(0, r300screen->caps.zmask_ram * frag_pipes); +} + +void r300_hyperz_destroy_mm(struct r300_context *r300) +{ + struct r300_screen* r300screen = r300->screen; + + if (r300screen->caps.hiz_ram) + u_mmDestroy(r300->hiz_mm); + + u_mmDestroy(r300->zmask_mm); } diff --git a/src/gallium/drivers/r300/r300_hyperz.h b/src/gallium/drivers/r300/r300_hyperz.h index 3df5053b89..09e1ff6625 100644 --- a/src/gallium/drivers/r300/r300_hyperz.h +++ b/src/gallium/drivers/r300/r300_hyperz.h @@ -27,4 +27,9 @@ struct r300_context; void r300_update_hyperz_state(struct r300_context* r300); +void r300_hiz_alloc_block(struct r300_context *r300, struct r300_surface *surf); +void r300_zmask_alloc_block(struct r300_context *r300, struct r300_surface *surf, int compress); + +void r300_hyperz_init_mm(struct r300_context *r300); +void r300_hyperz_destroy_mm(struct r300_context *r300); #endif diff --git a/src/gallium/drivers/r300/r300_reg.h b/src/gallium/drivers/r300/r300_reg.h index 2acc1a903e..99a9d65055 100644 --- a/src/gallium/drivers/r300/r300_reg.h +++ b/src/gallium/drivers/r300/r300_reg.h @@ -3436,6 +3436,7 @@ enum { # define R300_VBPNTR_SIZE1(x) (((x) >> 2) << 16) # define R300_VBPNTR_STRIDE1(x) (((x) >> 2) << 24) +#define R300_PACKET3_3D_CLEAR_ZMASK 0x00003200 #define R300_PACKET3_INDX_BUFFER 0x00003300 # define R300_INDX_BUFFER_DST_SHIFT 0 # define R300_INDX_BUFFER_SKIP_SHIFT 16 diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c index 7c4294bc9f..910f5f7113 100644 --- a/src/gallium/drivers/r300/r300_render.c +++ b/src/gallium/drivers/r300/r300_render.c @@ -223,7 +223,8 @@ static void r300_prepare_for_rendering(struct r300_context *r300, /* Emitted in flush. */ end_dwords += 26; /* emit_query_end */ - end_dwords += r300->hyperz_state.size; /* emit_hyperz_end */ + if (r300->rws->get_value(r300->rws, R300_CAN_HYPERZ)) + end_dwords += r300->hyperz_state.size + 2; /* emit_hyperz_end + zcache flush */ cs_dwords += end_dwords; diff --git a/src/gallium/drivers/r300/r300_screen.h b/src/gallium/drivers/r300/r300_screen.h index 18745b83a0..13a3320b99 100644 --- a/src/gallium/drivers/r300/r300_screen.h +++ b/src/gallium/drivers/r300/r300_screen.h @@ -91,6 +91,7 @@ r300_winsys_screen(struct pipe_screen *screen) { #define DBG_FB (1 << 9) #define DBG_RS_BLOCK (1 << 10) #define DBG_CBZB (1 << 11) +#define DBG_HYPERZ (1 << 12) /* Features. */ #define DBG_ANISOHQ (1 << 16) #define DBG_NO_TILING (1 << 17) diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c index a3383c3878..374aa254f0 100644 --- a/src/gallium/drivers/r300/r300_state.c +++ b/src/gallium/drivers/r300/r300_state.c @@ -25,6 +25,7 @@ #include "util/u_blitter.h" #include "util/u_math.h" +#include "util/u_mm.h" #include "util/u_memory.h" #include "util/u_pack_color.h" @@ -43,6 +44,7 @@ #include "r300_texture.h" #include "r300_vs.h" #include "r300_winsys.h" +#include "r300_hyperz.h" /* r300_state: Functions used to intialize state context by translating * Gallium state objects into semi-native r300 state objects. */ @@ -472,14 +474,14 @@ static void* dsa->dsa = *state; - /* Depth test setup. */ + /* Depth test setup. - separate write mask depth for decomp flush */ + if (state->depth.writemask) { + dsa->z_buffer_control |= R300_Z_WRITE_ENABLE; + } + if (state->depth.enabled) { dsa->z_buffer_control |= R300_Z_ENABLE; - if (state->depth.writemask) { - dsa->z_buffer_control |= R300_Z_WRITE_ENABLE; - } - dsa->z_stencil_control |= (r300_translate_depth_stencil_function(state->depth.func) << R300_Z_FUNC_SHIFT); @@ -592,6 +594,7 @@ static void r300_bind_dsa_state(struct pipe_context* pipe, UPDATE_STATE(state, r300->dsa_state); + r300->hyperz_state.dirty = TRUE; /* Will be updated before the emission. */ r300_dsa_inject_stencilref(r300); } @@ -685,7 +688,8 @@ void r300_mark_fb_state_dirty(struct r300_context *r300, /* What is marked as dirty depends on the enum r300_fb_state_change. */ r300->gpu_flush.dirty = TRUE; r300->fb_state.dirty = TRUE; - r300->hyperz_state.dirty = TRUE; + if (r300->rws->get_value(r300->rws, R300_CAN_HYPERZ)) + r300->hyperz_state.dirty = TRUE; if (change == R300_CHANGED_FB_STATE) { r300->aa_state.dirty = TRUE; @@ -698,7 +702,7 @@ void r300_mark_fb_state_dirty(struct r300_context *r300, if (r300->cbzb_clear) r300->fb_state.size += 10; else if (state->zsbuf) - r300->fb_state.size += r300->screen->caps.has_hiz ? 18 : 14; + r300->fb_state.size += r300->screen->caps.hiz_ram ? 18 : 14; /* The size of the rest of atoms stays the same. */ } @@ -710,8 +714,10 @@ static void struct r300_context* r300 = r300_context(pipe); struct r300_aa_state *aa = (struct r300_aa_state*)r300->aa_state.state; struct pipe_framebuffer_state *old_state = r300->fb_state.state; + boolean has_hyperz = r300->rws->get_value(r300->rws, R300_CAN_HYPERZ); unsigned max_width, max_height, i; uint32_t zbuffer_bpp = 0; + int blocksize; if (r300->screen->caps.is_r500) { max_width = max_height = 4096; @@ -743,17 +749,52 @@ static void r300_mark_fb_state_dirty(r300, R300_CHANGED_FB_STATE); - /* Polygon offset depends on the zbuffer bit depth. */ + r300->hiz_enable = false; + r300->z_fastfill = false; + r300->z_compression = false; + if (state->zsbuf) { - switch (util_format_get_blocksize(state->zsbuf->texture->format)) { - case 2: - zbuffer_bpp = 16; - break; - case 4: - zbuffer_bpp = 24; - break; + blocksize = util_format_get_blocksize(state->zsbuf->texture->format); + switch (blocksize) { + case 2: + zbuffer_bpp = 16; + break; + case 4: + zbuffer_bpp = 24; + break; } + if (has_hyperz) { + struct r300_surface *zs_surf = r300_surface(state->zsbuf); + struct r300_texture *tex; + int compress = r300->screen->caps.is_rv350 ? RV350_Z_COMPRESS_88 : R300_Z_COMPRESS_44; + int level = zs_surf->base.level; + + tex = r300_texture(zs_surf->base.texture); + + /* work out whether we can support hiz on this buffer */ + r300_hiz_alloc_block(r300, zs_surf); + + /* work out whether we can support zmask features on this buffer */ + r300_zmask_alloc_block(r300, zs_surf, compress); + + if (tex->hiz_mem[level]) { + r300->hiz_enable = 1; + } + if (tex->zmask_mem[level]) { + r300->z_fastfill = 1; + /* compression causes hangs on 16-bit */ + if (zbuffer_bpp == 24) + r300->z_compression = compress; + } + DBG(r300, DBG_HYPERZ, + "hyper-z features: hiz: %d @ %08x z-compression: %d z-fastfill: %d @ %08x\n", r300->hiz_enable, + tex->hiz_mem[level] ? tex->hiz_mem[level]->ofs : 0xdeadbeef, + r300->z_compression, r300->z_fastfill, + tex->zmask_mem[level] ? tex->zmask_mem[level]->ofs : 0xdeadbeef); + } + + /* Polygon offset depends on the zbuffer bit depth. */ if (r300->zbuffer_bpp != zbuffer_bpp) { r300->zbuffer_bpp = zbuffer_bpp; diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c index 66f64f0f6a..f3dad4c292 100644 --- a/src/gallium/drivers/r300/r300_state_derived.c +++ b/src/gallium/drivers/r300/r300_state_derived.c @@ -35,6 +35,7 @@ #include "r300_state_inlines.h" #include "r300_texture.h" #include "r300_vs.h" +#include "r300_winsys.h" /* r300_state_derived: Various bits of state which are dependent upon * currently bound CSO data. */ @@ -693,5 +694,6 @@ void r300_update_derived_state(struct r300_context* r300) } } - r300_update_hyperz_state(r300); + if (r300->rws->get_value(r300->rws, R300_CAN_HYPERZ)) + r300_update_hyperz_state(r300); } diff --git a/src/gallium/drivers/r300/r300_texture.c b/src/gallium/drivers/r300/r300_texture.c index fcdca5605e..da8eadd3b5 100644 --- a/src/gallium/drivers/r300/r300_texture.c +++ b/src/gallium/drivers/r300/r300_texture.c @@ -35,6 +35,7 @@ #include "util/u_format_s3tc.h" #include "util/u_math.h" #include "util/u_memory.h" +#include "util/u_mm.h" #include "pipe/p_screen.h" @@ -645,8 +646,16 @@ static void r300_texture_destroy(struct pipe_screen *screen, { struct r300_texture* tex = (struct r300_texture*)texture; struct r300_winsys_screen *rws = (struct r300_winsys_screen *)texture->screen->winsys; + int i; rws->buffer_reference(rws, &tex->buffer, NULL); + for (i = 0; i < R300_MAX_TEXTURE_LEVELS; i++) { + if (tex->hiz_mem[i]) + u_mmFreeMem(tex->hiz_mem[i]); + if (tex->zmask_mem[i]) + u_mmFreeMem(tex->zmask_mem[i]); + } + FREE(tex); } diff --git a/src/gallium/drivers/r300/r300_winsys.h b/src/gallium/drivers/r300/r300_winsys.h index ff11546a64..e7a1ede4fb 100644 --- a/src/gallium/drivers/r300/r300_winsys.h +++ b/src/gallium/drivers/r300/r300_winsys.h @@ -49,6 +49,7 @@ enum r300_value_id { R300_VID_Z_PIPES, R300_VID_SQUARE_TILING_SUPPORT, R300_VID_DRM_2_3_0, + R300_CAN_HYPERZ, }; enum r300_reference_domain { /* bitfield */ diff --git a/src/gallium/winsys/radeon/drm/radeon_drm.c b/src/gallium/winsys/radeon/drm/radeon_drm.c index e9a276362f..e7057ca593 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm.c +++ b/src/gallium/winsys/radeon/drm/radeon_drm.c @@ -130,6 +130,16 @@ static void do_ioctls(int fd, struct radeon_libdrm_winsys* winsys) } winsys->z_pipes = target; + winsys->hyperz = FALSE; +#ifndef RADEON_INFO_WANT_HYPERZ +#define RADEON_INFO_WANT_HYPERZ 7 +#endif + info.request = RADEON_INFO_WANT_HYPERZ; + retval = drmCommandWriteRead(fd, DRM_RADEON_INFO, &info, sizeof(info)); + if (!retval && target == 1) { + winsys->hyperz = TRUE; + } + retval = drmCommandWriteRead(fd, DRM_RADEON_GEM_INFO, &gem_info, sizeof(gem_info)); if (retval) { diff --git a/src/gallium/winsys/radeon/drm/radeon_r300.c b/src/gallium/winsys/radeon/drm/radeon_r300.c index 5544504067..955ae4c045 100644 --- a/src/gallium/winsys/radeon/drm/radeon_r300.c +++ b/src/gallium/winsys/radeon/drm/radeon_r300.c @@ -211,6 +211,8 @@ static uint32_t radeon_get_value(struct r300_winsys_screen *rws, return ws->squaretiling; case R300_VID_DRM_2_3_0: return ws->drm_2_3_0; + case R300_CAN_HYPERZ: + return ws->hyperz; } return 0; } diff --git a/src/gallium/winsys/radeon/drm/radeon_winsys.h b/src/gallium/winsys/radeon/drm/radeon_winsys.h index 533b7b2e2d..52db0d62d2 100644 --- a/src/gallium/winsys/radeon/drm/radeon_winsys.h +++ b/src/gallium/winsys/radeon/drm/radeon_winsys.h @@ -65,6 +65,9 @@ struct radeon_libdrm_winsys { */ boolean drm_2_3_0; + /* hyperz user */ + boolean hyperz; + /* DRM FD */ int fd; -- cgit v1.2.3 From 41f9e60bb30765e9d272bb6d61bb8fddc3623f8b Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Thu, 5 Aug 2010 20:30:31 +1000 Subject: r300g: disable hiz on rv530 for now. On my rv530 at least HiZ is causing rendering issues in gears. --- src/gallium/drivers/r300/r300_chipset.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_chipset.c b/src/gallium/drivers/r300/r300_chipset.c index 2df25f9c8e..48c2409211 100644 --- a/src/gallium/drivers/r300/r300_chipset.c +++ b/src/gallium/drivers/r300/r300_chipset.c @@ -366,7 +366,7 @@ void r300_parse_chipset(struct r300_capabilities* caps) caps->family = CHIP_FAMILY_RV530; caps->num_vert_fpus = 5; caps->is_r500 = TRUE; - caps->hiz_ram = RV530_HIZ_LIMIT; + /*caps->hiz_ram = RV530_HIZ_LIMIT;*/ caps->zmask_ram = PIPE_ZMASK_SIZE; break; -- cgit v1.2.3 From b5f104471175830de6474c176ebc567714edcdd6 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Thu, 5 Aug 2010 20:58:51 +1000 Subject: r300g: always emit hyperz state atom. --- src/gallium/drivers/r300/r300_context.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c index 0668fbc151..4658ab0ff5 100644 --- a/src/gallium/drivers/r300/r300_context.c +++ b/src/gallium/drivers/r300/r300_context.c @@ -195,8 +195,7 @@ static void r300_setup_atoms(struct r300_context* r300) R300_INIT_ATOM(gpu_flush, 9); R300_INIT_ATOM(aa_state, 4); R300_INIT_ATOM(fb_state, 0); - if (has_hyperz) - R300_INIT_ATOM(hyperz_state, is_rv350 ? 10 : 8); + R300_INIT_ATOM(hyperz_state, is_rv350 ? 10 : 8); /* ZB (unpipelined), SC. */ R300_INIT_ATOM(ztop_state, 2); /* ZB, FG. */ @@ -251,8 +250,7 @@ static void r300_setup_atoms(struct r300_context* r300) r300->clip_state.state = CALLOC_STRUCT(r300_clip_state); r300->fb_state.state = CALLOC_STRUCT(pipe_framebuffer_state); r300->gpu_flush.state = CALLOC_STRUCT(pipe_framebuffer_state); - if (has_hyperz) - r300->hyperz_state.state = CALLOC_STRUCT(r300_hyperz_state); + r300->hyperz_state.state = CALLOC_STRUCT(r300_hyperz_state); r300->invariant_state.state = CALLOC_STRUCT(r300_invariant_state); r300->rs_block_state.state = CALLOC_STRUCT(r300_rs_block); r300->scissor_state.state = CALLOC_STRUCT(pipe_scissor_state); @@ -365,7 +363,6 @@ static void r300_init_states(struct pipe_context *pipe) } /* Initialize the hyperz state. */ - if (r300->rws->get_value(r300->rws, R300_CAN_HYPERZ)) { struct r300_hyperz_state *hyperz = (struct r300_hyperz_state*)r300->hyperz_state.state; -- cgit v1.2.3 From a4be3b6b312496305de473b5af219a3afd84f6c6 Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Thu, 5 Aug 2010 19:04:01 +0200 Subject: r300g: fix fb_state atom size --- src/gallium/drivers/r300/r300_state.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c index 374aa254f0..1e6b81d798 100644 --- a/src/gallium/drivers/r300/r300_state.c +++ b/src/gallium/drivers/r300/r300_state.c @@ -684,6 +684,7 @@ void r300_mark_fb_state_dirty(struct r300_context *r300, enum r300_fb_state_change change) { struct pipe_framebuffer_state *state = r300->fb_state.state; + boolean has_hyperz = r300->rws->get_value(r300->rws, R300_CAN_HYPERZ); /* What is marked as dirty depends on the enum r300_fb_state_change. */ r300->gpu_flush.dirty = TRUE; @@ -701,8 +702,11 @@ void r300_mark_fb_state_dirty(struct r300_context *r300, if (r300->cbzb_clear) r300->fb_state.size += 10; - else if (state->zsbuf) - r300->fb_state.size += r300->screen->caps.hiz_ram ? 18 : 14; + else if (state->zsbuf) { + r300->fb_state.size += 10; + if (has_hyperz) + r300->fb_state.size += r300->screen->caps.hiz_ram ? 8 : 4; + } /* The size of the rest of atoms stays the same. */ } -- cgit v1.2.3 From 9c949d4a4dd43b7889e13bdf683bcf211f049ced Mon Sep 17 00:00:00 2001 From: Jerome Glisse Date: Wed, 4 Aug 2010 17:37:59 -0400 Subject: r600g: don't use dynamic state allocation for states Simplify state handly by avoiding state allocation. Next step is to allocate once for all context packet buffer and then avoid rebuilding pm4 packet each time (through use of combined crc) this would also avoid number of memcpy. Signed-off-by: Jerome Glisse --- src/gallium/drivers/r600/r600_context.c | 141 +++++++-------- src/gallium/drivers/r600/r600_context.h | 29 +-- src/gallium/drivers/r600/r600_draw.c | 115 ++++++------ src/gallium/drivers/r600/r600_shader.c | 34 ++-- src/gallium/drivers/r600/r600_state.c | 281 ++++++++++++----------------- src/gallium/drivers/r600/radeon.h | 52 ++++-- src/gallium/targets/dri-r600/Makefile | 4 +- src/gallium/winsys/r600/drm/radeon.c | 11 -- src/gallium/winsys/r600/drm/radeon_ctx.c | 160 +++++----------- src/gallium/winsys/r600/drm/radeon_draw.c | 92 +--------- src/gallium/winsys/r600/drm/radeon_priv.h | 29 --- src/gallium/winsys/r600/drm/radeon_state.c | 70 +------ 12 files changed, 361 insertions(+), 657 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/r600_context.c b/src/gallium/drivers/r600/r600_context.c index ae1780a1d4..f7732d8952 100644 --- a/src/gallium/drivers/r600/r600_context.c +++ b/src/gallium/drivers/r600/r600_context.c @@ -48,18 +48,14 @@ void r600_flush(struct pipe_context *ctx, unsigned flags, struct r600_screen *rscreen = rctx->screen; static int dc = 0; - if (radeon_ctx_pm4(rctx->ctx)) + if (radeon_ctx_pm4(&rctx->ctx)) return; /* FIXME dumping should be removed once shader support instructions * without throwing bad code */ if (!dc) - radeon_ctx_dump_bof(rctx->ctx, "gallium.bof"); -#if 1 - radeon_ctx_submit(rctx->ctx); -#endif - rctx->ctx = radeon_ctx_decref(rctx->ctx); - rctx->ctx = radeon_ctx(rscreen->rw); + radeon_ctx_dump_bof(&rctx->ctx, "gallium.bof"); + radeon_ctx_submit(&rctx->ctx); dc++; } @@ -220,9 +216,8 @@ static void r600_init_config(struct r600_context *rctx) printf("num_gs_stack_entries : %d\n", num_gs_stack_entries); printf("num_es_stack_entries : %d\n", num_es_stack_entries); - rctx->hw_states.config = radeon_state(rctx->rw, R600_CONFIG_TYPE, R600_CONFIG); - - rctx->hw_states.config->states[R600_CONFIG__SQ_CONFIG] = 0x00000000; + radeon_state_init(&rctx->config, rctx->rw, R600_CONFIG_TYPE, R600_CONFIG); + rctx->config.states[R600_CONFIG__SQ_CONFIG] = 0x00000000; switch (family) { case CHIP_RV610: case CHIP_RV620: @@ -231,75 +226,75 @@ static void r600_init_config(struct r600_context *rctx) case CHIP_RV710: break; default: - rctx->hw_states.config->states[R600_CONFIG__SQ_CONFIG] |= S_008C00_VC_ENABLE(1); + rctx->config.states[R600_CONFIG__SQ_CONFIG] |= S_008C00_VC_ENABLE(1); break; } - rctx->hw_states.config->states[R600_CONFIG__SQ_CONFIG] |= S_008C00_DX9_CONSTS(1); - rctx->hw_states.config->states[R600_CONFIG__SQ_CONFIG] |= S_008C00_ALU_INST_PREFER_VECTOR(1); - rctx->hw_states.config->states[R600_CONFIG__SQ_CONFIG] |= S_008C00_PS_PRIO(ps_prio); - rctx->hw_states.config->states[R600_CONFIG__SQ_CONFIG] |= S_008C00_VS_PRIO(vs_prio); - rctx->hw_states.config->states[R600_CONFIG__SQ_CONFIG] |= S_008C00_GS_PRIO(gs_prio); - rctx->hw_states.config->states[R600_CONFIG__SQ_CONFIG] |= S_008C00_ES_PRIO(es_prio); + rctx->config.states[R600_CONFIG__SQ_CONFIG] |= S_008C00_DX9_CONSTS(1); + rctx->config.states[R600_CONFIG__SQ_CONFIG] |= S_008C00_ALU_INST_PREFER_VECTOR(1); + rctx->config.states[R600_CONFIG__SQ_CONFIG] |= S_008C00_PS_PRIO(ps_prio); + rctx->config.states[R600_CONFIG__SQ_CONFIG] |= S_008C00_VS_PRIO(vs_prio); + rctx->config.states[R600_CONFIG__SQ_CONFIG] |= S_008C00_GS_PRIO(gs_prio); + rctx->config.states[R600_CONFIG__SQ_CONFIG] |= S_008C00_ES_PRIO(es_prio); - rctx->hw_states.config->states[R600_CONFIG__SQ_GPR_RESOURCE_MGMT_1] = 0; - rctx->hw_states.config->states[R600_CONFIG__SQ_GPR_RESOURCE_MGMT_1] |= S_008C04_NUM_PS_GPRS(num_ps_gprs); - rctx->hw_states.config->states[R600_CONFIG__SQ_GPR_RESOURCE_MGMT_1] |= S_008C04_NUM_VS_GPRS(num_vs_gprs); - rctx->hw_states.config->states[R600_CONFIG__SQ_GPR_RESOURCE_MGMT_1] |= S_008C04_NUM_CLAUSE_TEMP_GPRS(num_temp_gprs); + rctx->config.states[R600_CONFIG__SQ_GPR_RESOURCE_MGMT_1] = 0; + rctx->config.states[R600_CONFIG__SQ_GPR_RESOURCE_MGMT_1] |= S_008C04_NUM_PS_GPRS(num_ps_gprs); + rctx->config.states[R600_CONFIG__SQ_GPR_RESOURCE_MGMT_1] |= S_008C04_NUM_VS_GPRS(num_vs_gprs); + rctx->config.states[R600_CONFIG__SQ_GPR_RESOURCE_MGMT_1] |= S_008C04_NUM_CLAUSE_TEMP_GPRS(num_temp_gprs); - rctx->hw_states.config->states[R600_CONFIG__SQ_GPR_RESOURCE_MGMT_2] = 0; - rctx->hw_states.config->states[R600_CONFIG__SQ_GPR_RESOURCE_MGMT_2] |= S_008C08_NUM_GS_GPRS(num_gs_gprs); - rctx->hw_states.config->states[R600_CONFIG__SQ_GPR_RESOURCE_MGMT_2] |= S_008C08_NUM_GS_GPRS(num_es_gprs); + rctx->config.states[R600_CONFIG__SQ_GPR_RESOURCE_MGMT_2] = 0; + rctx->config.states[R600_CONFIG__SQ_GPR_RESOURCE_MGMT_2] |= S_008C08_NUM_GS_GPRS(num_gs_gprs); + rctx->config.states[R600_CONFIG__SQ_GPR_RESOURCE_MGMT_2] |= S_008C08_NUM_GS_GPRS(num_es_gprs); - rctx->hw_states.config->states[R600_CONFIG__SQ_THREAD_RESOURCE_MGMT] = 0; - rctx->hw_states.config->states[R600_CONFIG__SQ_THREAD_RESOURCE_MGMT] |= S_008C0C_NUM_PS_THREADS(num_ps_threads); - rctx->hw_states.config->states[R600_CONFIG__SQ_THREAD_RESOURCE_MGMT] |= S_008C0C_NUM_VS_THREADS(num_vs_threads); - rctx->hw_states.config->states[R600_CONFIG__SQ_THREAD_RESOURCE_MGMT] |= S_008C0C_NUM_GS_THREADS(num_gs_threads); - rctx->hw_states.config->states[R600_CONFIG__SQ_THREAD_RESOURCE_MGMT] |= S_008C0C_NUM_ES_THREADS(num_es_threads); + rctx->config.states[R600_CONFIG__SQ_THREAD_RESOURCE_MGMT] = 0; + rctx->config.states[R600_CONFIG__SQ_THREAD_RESOURCE_MGMT] |= S_008C0C_NUM_PS_THREADS(num_ps_threads); + rctx->config.states[R600_CONFIG__SQ_THREAD_RESOURCE_MGMT] |= S_008C0C_NUM_VS_THREADS(num_vs_threads); + rctx->config.states[R600_CONFIG__SQ_THREAD_RESOURCE_MGMT] |= S_008C0C_NUM_GS_THREADS(num_gs_threads); + rctx->config.states[R600_CONFIG__SQ_THREAD_RESOURCE_MGMT] |= S_008C0C_NUM_ES_THREADS(num_es_threads); - rctx->hw_states.config->states[R600_CONFIG__SQ_STACK_RESOURCE_MGMT_1] = 0; - rctx->hw_states.config->states[R600_CONFIG__SQ_STACK_RESOURCE_MGMT_1] |= S_008C10_NUM_PS_STACK_ENTRIES(num_ps_stack_entries); - rctx->hw_states.config->states[R600_CONFIG__SQ_STACK_RESOURCE_MGMT_1] |= S_008C10_NUM_VS_STACK_ENTRIES(num_vs_stack_entries); + rctx->config.states[R600_CONFIG__SQ_STACK_RESOURCE_MGMT_1] = 0; + rctx->config.states[R600_CONFIG__SQ_STACK_RESOURCE_MGMT_1] |= S_008C10_NUM_PS_STACK_ENTRIES(num_ps_stack_entries); + rctx->config.states[R600_CONFIG__SQ_STACK_RESOURCE_MGMT_1] |= S_008C10_NUM_VS_STACK_ENTRIES(num_vs_stack_entries); - rctx->hw_states.config->states[R600_CONFIG__SQ_STACK_RESOURCE_MGMT_2] = 0; - rctx->hw_states.config->states[R600_CONFIG__SQ_STACK_RESOURCE_MGMT_2] |= S_008C14_NUM_GS_STACK_ENTRIES(num_gs_stack_entries); - rctx->hw_states.config->states[R600_CONFIG__SQ_STACK_RESOURCE_MGMT_2] |= S_008C14_NUM_ES_STACK_ENTRIES(num_es_stack_entries); + rctx->config.states[R600_CONFIG__SQ_STACK_RESOURCE_MGMT_2] = 0; + rctx->config.states[R600_CONFIG__SQ_STACK_RESOURCE_MGMT_2] |= S_008C14_NUM_GS_STACK_ENTRIES(num_gs_stack_entries); + rctx->config.states[R600_CONFIG__SQ_STACK_RESOURCE_MGMT_2] |= S_008C14_NUM_ES_STACK_ENTRIES(num_es_stack_entries); - rctx->hw_states.config->states[R600_CONFIG__SQ_DYN_GPR_CNTL_PS_FLUSH_REQ] = 0x00004000; - rctx->hw_states.config->states[R600_CONFIG__TA_CNTL_AUX] = 0x07000002; - rctx->hw_states.config->states[R600_CONFIG__VC_ENHANCE] = 0x00000000; - rctx->hw_states.config->states[R600_CONFIG__DB_DEBUG] = 0x00000000; - rctx->hw_states.config->states[R600_CONFIG__DB_WATERMARKS] = 0x00420204; - rctx->hw_states.config->states[R600_CONFIG__SX_MISC] = 0x00000000; - rctx->hw_states.config->states[R600_CONFIG__SPI_THREAD_GROUPING] = 0x00000001; - rctx->hw_states.config->states[R600_CONFIG__CB_SHADER_CONTROL] = 0x00000003; - rctx->hw_states.config->states[R600_CONFIG__SQ_ESGS_RING_ITEMSIZE] = 0x00000000; - rctx->hw_states.config->states[R600_CONFIG__SQ_GSVS_RING_ITEMSIZE] = 0x00000000; - rctx->hw_states.config->states[R600_CONFIG__SQ_ESTMP_RING_ITEMSIZE] = 0x00000000; - rctx->hw_states.config->states[R600_CONFIG__SQ_GSTMP_RING_ITEMSIZE] = 0x00000000; - rctx->hw_states.config->states[R600_CONFIG__SQ_VSTMP_RING_ITEMSIZE] = 0x00000000; - rctx->hw_states.config->states[R600_CONFIG__SQ_PSTMP_RING_ITEMSIZE] = 0x00000000; - rctx->hw_states.config->states[R600_CONFIG__SQ_FBUF_RING_ITEMSIZE] = 0x00000000; - rctx->hw_states.config->states[R600_CONFIG__SQ_REDUC_RING_ITEMSIZE] = 0x00000000; - rctx->hw_states.config->states[R600_CONFIG__SQ_GS_VERT_ITEMSIZE] = 0x00000000; - rctx->hw_states.config->states[R600_CONFIG__VGT_OUTPUT_PATH_CNTL] = 0x00000000; - rctx->hw_states.config->states[R600_CONFIG__VGT_HOS_CNTL] = 0x00000000; - rctx->hw_states.config->states[R600_CONFIG__VGT_HOS_MAX_TESS_LEVEL] = 0x00000000; - rctx->hw_states.config->states[R600_CONFIG__VGT_HOS_MIN_TESS_LEVEL] = 0x00000000; - rctx->hw_states.config->states[R600_CONFIG__VGT_HOS_REUSE_DEPTH] = 0x00000000; - rctx->hw_states.config->states[R600_CONFIG__VGT_GROUP_PRIM_TYPE] = 0x00000000; - rctx->hw_states.config->states[R600_CONFIG__VGT_GROUP_FIRST_DECR] = 0x00000000; - rctx->hw_states.config->states[R600_CONFIG__VGT_GROUP_DECR] = 0x00000000; - rctx->hw_states.config->states[R600_CONFIG__VGT_GROUP_VECT_0_CNTL] = 0x00000000; - rctx->hw_states.config->states[R600_CONFIG__VGT_GROUP_VECT_1_CNTL] = 0x00000000; - rctx->hw_states.config->states[R600_CONFIG__VGT_GROUP_VECT_0_FMT_CNTL] = 0x00000000; - rctx->hw_states.config->states[R600_CONFIG__VGT_GROUP_VECT_1_FMT_CNTL] = 0x00000000; - rctx->hw_states.config->states[R600_CONFIG__VGT_GS_MODE] = 0x00000000; - rctx->hw_states.config->states[R600_CONFIG__PA_SC_MODE_CNTL] = 0x00514000; - rctx->hw_states.config->states[R600_CONFIG__VGT_STRMOUT_EN] = 0x00000000; - rctx->hw_states.config->states[R600_CONFIG__VGT_REUSE_OFF] = 0x00000001; - rctx->hw_states.config->states[R600_CONFIG__VGT_VTX_CNT_EN] = 0x00000000; - rctx->hw_states.config->states[R600_CONFIG__VGT_STRMOUT_BUFFER_EN] = 0x00000000; - radeon_state_pm4(rctx->hw_states.config); + rctx->config.states[R600_CONFIG__SQ_DYN_GPR_CNTL_PS_FLUSH_REQ] = 0x00004000; + rctx->config.states[R600_CONFIG__TA_CNTL_AUX] = 0x07000002; + rctx->config.states[R600_CONFIG__VC_ENHANCE] = 0x00000000; + rctx->config.states[R600_CONFIG__DB_DEBUG] = 0x00000000; + rctx->config.states[R600_CONFIG__DB_WATERMARKS] = 0x00420204; + rctx->config.states[R600_CONFIG__SX_MISC] = 0x00000000; + rctx->config.states[R600_CONFIG__SPI_THREAD_GROUPING] = 0x00000001; + rctx->config.states[R600_CONFIG__CB_SHADER_CONTROL] = 0x00000003; + rctx->config.states[R600_CONFIG__SQ_ESGS_RING_ITEMSIZE] = 0x00000000; + rctx->config.states[R600_CONFIG__SQ_GSVS_RING_ITEMSIZE] = 0x00000000; + rctx->config.states[R600_CONFIG__SQ_ESTMP_RING_ITEMSIZE] = 0x00000000; + rctx->config.states[R600_CONFIG__SQ_GSTMP_RING_ITEMSIZE] = 0x00000000; + rctx->config.states[R600_CONFIG__SQ_VSTMP_RING_ITEMSIZE] = 0x00000000; + rctx->config.states[R600_CONFIG__SQ_PSTMP_RING_ITEMSIZE] = 0x00000000; + rctx->config.states[R600_CONFIG__SQ_FBUF_RING_ITEMSIZE] = 0x00000000; + rctx->config.states[R600_CONFIG__SQ_REDUC_RING_ITEMSIZE] = 0x00000000; + rctx->config.states[R600_CONFIG__SQ_GS_VERT_ITEMSIZE] = 0x00000000; + rctx->config.states[R600_CONFIG__VGT_OUTPUT_PATH_CNTL] = 0x00000000; + rctx->config.states[R600_CONFIG__VGT_HOS_CNTL] = 0x00000000; + rctx->config.states[R600_CONFIG__VGT_HOS_MAX_TESS_LEVEL] = 0x00000000; + rctx->config.states[R600_CONFIG__VGT_HOS_MIN_TESS_LEVEL] = 0x00000000; + rctx->config.states[R600_CONFIG__VGT_HOS_REUSE_DEPTH] = 0x00000000; + rctx->config.states[R600_CONFIG__VGT_GROUP_PRIM_TYPE] = 0x00000000; + rctx->config.states[R600_CONFIG__VGT_GROUP_FIRST_DECR] = 0x00000000; + rctx->config.states[R600_CONFIG__VGT_GROUP_DECR] = 0x00000000; + rctx->config.states[R600_CONFIG__VGT_GROUP_VECT_0_CNTL] = 0x00000000; + rctx->config.states[R600_CONFIG__VGT_GROUP_VECT_1_CNTL] = 0x00000000; + rctx->config.states[R600_CONFIG__VGT_GROUP_VECT_0_FMT_CNTL] = 0x00000000; + rctx->config.states[R600_CONFIG__VGT_GROUP_VECT_1_FMT_CNTL] = 0x00000000; + rctx->config.states[R600_CONFIG__VGT_GS_MODE] = 0x00000000; + rctx->config.states[R600_CONFIG__PA_SC_MODE_CNTL] = 0x00514000; + rctx->config.states[R600_CONFIG__VGT_STRMOUT_EN] = 0x00000000; + rctx->config.states[R600_CONFIG__VGT_REUSE_OFF] = 0x00000001; + rctx->config.states[R600_CONFIG__VGT_VTX_CNT_EN] = 0x00000000; + rctx->config.states[R600_CONFIG__VGT_STRMOUT_BUFFER_EN] = 0x00000000; + radeon_state_pm4(&rctx->config); } struct pipe_context *r600_create_context(struct pipe_screen *screen, void *priv) @@ -333,7 +328,7 @@ struct pipe_context *r600_create_context(struct pipe_screen *screen, void *priv) r600_init_config(rctx); - rctx->ctx = radeon_ctx(rscreen->rw); - rctx->draw = radeon_draw(rscreen->rw); + radeon_ctx_init(&rctx->ctx, rscreen->rw); + radeon_draw_init(&rctx->draw, rscreen->rw); return &rctx->context; } diff --git a/src/gallium/drivers/r600/r600_context.h b/src/gallium/drivers/r600/r600_context.h index 8d102b6850..78da88fef5 100644 --- a/src/gallium/drivers/r600/r600_context.h +++ b/src/gallium/drivers/r600/r600_context.h @@ -76,7 +76,7 @@ struct r600_context_state { union pipe_states state; unsigned refcount; unsigned type; - struct radeon_state *rstate; + struct radeon_state rstate; struct r600_shader shader; struct radeon_bo *bo; }; @@ -89,28 +89,28 @@ struct r600_vertex_element }; struct r600_context_hw_states { - struct radeon_state *rasterizer; - struct radeon_state *scissor; - struct radeon_state *dsa; - struct radeon_state *blend; - struct radeon_state *viewport; - struct radeon_state *cb0; - struct radeon_state *config; - struct radeon_state *cb_cntl; - struct radeon_state *db; + struct radeon_state rasterizer; + struct radeon_state scissor; + struct radeon_state dsa; + struct radeon_state blend; + struct radeon_state viewport; + struct radeon_state cb0; + struct radeon_state config; + struct radeon_state cb_cntl; + struct radeon_state db; unsigned ps_nresource; unsigned ps_nsampler; - struct radeon_state *ps_resource[160]; - struct radeon_state *ps_sampler[16]; + struct radeon_state ps_resource[160]; + struct radeon_state ps_sampler[16]; }; struct r600_context { struct pipe_context context; struct r600_screen *screen; struct radeon *rw; - struct radeon_ctx *ctx; + struct radeon_ctx ctx; struct blitter_context *blitter; - struct radeon_draw *draw; + struct radeon_draw draw; /* hw states */ struct r600_context_hw_states hw_states; /* pipe states */ @@ -120,6 +120,7 @@ struct r600_context { unsigned ps_nsampler_view; unsigned vs_nsampler_view; unsigned nvertex_buffer; + struct radeon_state config; struct r600_context_state *rasterizer; struct r600_context_state *poly_stipple; struct r600_context_state *scissor; diff --git a/src/gallium/drivers/r600/r600_draw.c b/src/gallium/drivers/r600/r600_draw.c index 43c805b982..7130bf2fa8 100644 --- a/src/gallium/drivers/r600/r600_draw.c +++ b/src/gallium/drivers/r600/r600_draw.c @@ -38,8 +38,8 @@ struct r600_draw { struct pipe_context *ctx; - struct radeon_state *draw; - struct radeon_state *vgt; + struct radeon_state draw; + struct radeon_state vgt; unsigned mode; unsigned start; unsigned count; @@ -51,7 +51,7 @@ static int r600_draw_common(struct r600_draw *draw) { struct r600_context *rctx = r600_context(draw->ctx); struct r600_screen *rscreen = rctx->screen; - struct radeon_state *vs_resource; + struct radeon_state vs_resource; struct r600_resource *rbuffer; unsigned i, j, offset, format, prim; u32 vgt_dma_index_type, vgt_draw_initiator; @@ -88,10 +88,10 @@ static int r600_draw_common(struct r600_draw *draw) r = r600_pipe_shader_update(draw->ctx, rctx->ps_shader); if (r) return r; - r = radeon_draw_set(rctx->draw, rctx->vs_shader->rstate); + r = radeon_draw_set(&rctx->draw, &rctx->vs_shader->rstate); if (r) return r; - r = radeon_draw_set(rctx->draw, rctx->ps_shader->rstate); + r = radeon_draw_set(&rctx->draw, &rctx->ps_shader->rstate); if (r) return r; @@ -103,81 +103,68 @@ static int r600_draw_common(struct r600_draw *draw) r = r600_conv_pipe_format(rctx->vertex_elements->elements[i].src_format, &format); if (r) return r; - vs_resource = radeon_state(rscreen->rw, R600_VS_RESOURCE_TYPE, R600_VS_RESOURCE + i); - if (vs_resource == NULL) - return -ENOMEM; - vs_resource->bo[0] = radeon_bo_incref(rscreen->rw, rbuffer->bo); - vs_resource->nbo = 1; - vs_resource->states[R600_PS_RESOURCE__RESOURCE0_WORD0] = offset; - vs_resource->states[R600_PS_RESOURCE__RESOURCE0_WORD1] = rbuffer->bo->size - offset; - vs_resource->states[R600_PS_RESOURCE__RESOURCE0_WORD2] = S_038008_STRIDE(vertex_buffer->stride) | - S_038008_DATA_FORMAT(format); - vs_resource->states[R600_PS_RESOURCE__RESOURCE0_WORD3] = 0x00000000; - vs_resource->states[R600_PS_RESOURCE__RESOURCE0_WORD4] = 0x00000000; - vs_resource->states[R600_PS_RESOURCE__RESOURCE0_WORD5] = 0x00000000; - vs_resource->states[R600_PS_RESOURCE__RESOURCE0_WORD6] = 0xC0000000; - vs_resource->placement[0] = RADEON_GEM_DOMAIN_GTT; - vs_resource->placement[1] = RADEON_GEM_DOMAIN_GTT; - r = radeon_draw_set_new(rctx->draw, vs_resource); - if (r) - return r; - } -#if 0 - /* setup texture sampler & resource */ - for (i = 0 ; i < rctx->ps_nsampler; i++) { - r = radeon_draw_set_new(rctx->draw, rctx->ps_sampler[i]->rstate); + r = radeon_state_init(&vs_resource, rscreen->rw, R600_VS_RESOURCE_TYPE, R600_VS_RESOURCE + i); if (r) return r; - } - for (i = 0 ; i < rctx->ps_nsampler_view; i++) { - r = radeon_draw_set_new(rctx->draw, rctx->ps_sampler_view[i]->rstate); + vs_resource.bo[0] = radeon_bo_incref(rscreen->rw, rbuffer->bo); + vs_resource.nbo = 1; + vs_resource.states[R600_PS_RESOURCE__RESOURCE0_WORD0] = offset; + vs_resource.states[R600_PS_RESOURCE__RESOURCE0_WORD1] = rbuffer->bo->size - offset; + vs_resource.states[R600_PS_RESOURCE__RESOURCE0_WORD2] = S_038008_STRIDE(vertex_buffer->stride) | + S_038008_DATA_FORMAT(format); + vs_resource.states[R600_PS_RESOURCE__RESOURCE0_WORD3] = 0x00000000; + vs_resource.states[R600_PS_RESOURCE__RESOURCE0_WORD4] = 0x00000000; + vs_resource.states[R600_PS_RESOURCE__RESOURCE0_WORD5] = 0x00000000; + vs_resource.states[R600_PS_RESOURCE__RESOURCE0_WORD6] = 0xC0000000; + vs_resource.placement[0] = RADEON_GEM_DOMAIN_GTT; + vs_resource.placement[1] = RADEON_GEM_DOMAIN_GTT; + radeon_state_pm4(&vs_resource); + r = radeon_draw_set(&rctx->draw, &vs_resource); if (r) return r; } -#endif /* FIXME start need to change winsys */ - draw->draw = radeon_state(rscreen->rw, R600_DRAW_TYPE, R600_DRAW); - if (draw->draw == NULL) - return -ENOMEM; - draw->draw->states[R600_DRAW__VGT_NUM_INDICES] = draw->count; - draw->draw->states[R600_DRAW__VGT_DRAW_INITIATOR] = vgt_draw_initiator; + r = radeon_state_init(&draw->draw, rscreen->rw, R600_DRAW_TYPE, R600_DRAW); + if (r) + return r; + draw->draw.states[R600_DRAW__VGT_NUM_INDICES] = draw->count; + draw->draw.states[R600_DRAW__VGT_DRAW_INITIATOR] = vgt_draw_initiator; if (draw->index_buffer) { - rbuffer = (struct r600_buffer*)draw->index_buffer; - draw->draw->bo[0] = radeon_bo_incref(rscreen->rw, rbuffer->bo); - draw->draw->placement[0] = RADEON_GEM_DOMAIN_GTT; - draw->draw->placement[1] = RADEON_GEM_DOMAIN_GTT; - draw->draw->nbo = 1; + rbuffer = (struct r600_resource*)draw->index_buffer; + draw->draw.bo[0] = radeon_bo_incref(rscreen->rw, rbuffer->bo); + draw->draw.placement[0] = RADEON_GEM_DOMAIN_GTT; + draw->draw.placement[1] = RADEON_GEM_DOMAIN_GTT; + draw->draw.nbo = 1; } - r = radeon_draw_set_new(rctx->draw, draw->draw); + radeon_state_pm4(&draw->draw); + r = radeon_draw_set(&rctx->draw, &draw->draw); + if (r) + return r; + r = radeon_state_init(&draw->vgt, rscreen->rw, R600_VGT_TYPE, R600_VGT); if (r) return r; - draw->vgt = radeon_state(rscreen->rw, R600_VGT_TYPE, R600_VGT); - if (draw->vgt == NULL) - return -ENOMEM; - draw->vgt->states[R600_VGT__VGT_PRIMITIVE_TYPE] = prim; - draw->vgt->states[R600_VGT__VGT_MAX_VTX_INDX] = 0x00FFFFFF; - draw->vgt->states[R600_VGT__VGT_MIN_VTX_INDX] = 0x00000000; - draw->vgt->states[R600_VGT__VGT_INDX_OFFSET] = draw->start; - draw->vgt->states[R600_VGT__VGT_MULTI_PRIM_IB_RESET_INDX] = 0x00000000; - draw->vgt->states[R600_VGT__VGT_DMA_INDEX_TYPE] = vgt_dma_index_type; - draw->vgt->states[R600_VGT__VGT_PRIMITIVEID_EN] = 0x00000000; - draw->vgt->states[R600_VGT__VGT_DMA_NUM_INSTANCES] = 0x00000001; - draw->vgt->states[R600_VGT__VGT_MULTI_PRIM_IB_RESET_EN] = 0x00000000; - draw->vgt->states[R600_VGT__VGT_INSTANCE_STEP_RATE_0] = 0x00000000; - draw->vgt->states[R600_VGT__VGT_INSTANCE_STEP_RATE_1] = 0x00000000; - r = radeon_draw_set_new(rctx->draw, draw->vgt); + draw->vgt.states[R600_VGT__VGT_PRIMITIVE_TYPE] = prim; + draw->vgt.states[R600_VGT__VGT_MAX_VTX_INDX] = 0x00FFFFFF; + draw->vgt.states[R600_VGT__VGT_MIN_VTX_INDX] = 0x00000000; + draw->vgt.states[R600_VGT__VGT_INDX_OFFSET] = draw->start; + draw->vgt.states[R600_VGT__VGT_MULTI_PRIM_IB_RESET_INDX] = 0x00000000; + draw->vgt.states[R600_VGT__VGT_DMA_INDEX_TYPE] = vgt_dma_index_type; + draw->vgt.states[R600_VGT__VGT_PRIMITIVEID_EN] = 0x00000000; + draw->vgt.states[R600_VGT__VGT_DMA_NUM_INSTANCES] = 0x00000001; + draw->vgt.states[R600_VGT__VGT_MULTI_PRIM_IB_RESET_EN] = 0x00000000; + draw->vgt.states[R600_VGT__VGT_INSTANCE_STEP_RATE_0] = 0x00000000; + draw->vgt.states[R600_VGT__VGT_INSTANCE_STEP_RATE_1] = 0x00000000; + radeon_state_pm4(&draw->vgt); + r = radeon_draw_set(&rctx->draw, &draw->vgt); if (r) return r; /* FIXME */ - r = radeon_ctx_set_draw_new(rctx->ctx, rctx->draw); + r = radeon_ctx_set_draw(&rctx->ctx, &rctx->draw); if (r == -EBUSY) { r600_flush(draw->ctx, 0, NULL); - r = radeon_ctx_set_draw_new(rctx->ctx, rctx->draw); + r = radeon_ctx_set_draw(&rctx->ctx, &rctx->draw); } - if (r) - return r; - rctx->draw = radeon_draw_duplicate(rctx->draw); - return 0; + return r; } void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info) diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index 8da102cde0..f38aa7b463 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -130,11 +130,12 @@ static int r600_pipe_shader_vs(struct pipe_context *ctx, struct r600_context_sta struct r600_shader *rshader = &rpshader->shader; struct radeon_state *state; unsigned i, tmp; + int r; - rpshader->rstate = radeon_state_decref(rpshader->rstate); - state = radeon_state(rscreen->rw, R600_VS_SHADER_TYPE, R600_VS_SHADER); - if (state == NULL) - return -ENOMEM; + r = radeon_state_init(&rpshader->rstate, rscreen->rw, R600_VS_SHADER_TYPE, R600_VS_SHADER); + if (r) + return r; + state = &rpshader->rstate; for (i = 0; i < 10; i++) { state->states[R600_VS_SHADER__SPI_VS_OUT_ID_0 + i] = 0; } @@ -145,11 +146,10 @@ static int r600_pipe_shader_vs(struct pipe_context *ctx, struct r600_context_sta } state->states[R600_VS_SHADER__SPI_VS_OUT_CONFIG] = S_0286C4_VS_EXPORT_COUNT(rshader->noutput - 2); state->states[R600_VS_SHADER__SQ_PGM_RESOURCES_VS] = S_028868_NUM_GPRS(rshader->bc.ngpr); - rpshader->rstate = state; - rpshader->rstate->bo[0] = radeon_bo_incref(rscreen->rw, rpshader->bo); - rpshader->rstate->bo[1] = radeon_bo_incref(rscreen->rw, rpshader->bo); - rpshader->rstate->nbo = 2; - rpshader->rstate->placement[0] = RADEON_GEM_DOMAIN_GTT; + rpshader->rstate.bo[0] = radeon_bo_incref(rscreen->rw, rpshader->bo); + rpshader->rstate.bo[1] = radeon_bo_incref(rscreen->rw, rpshader->bo); + rpshader->rstate.nbo = 2; + rpshader->rstate.placement[0] = RADEON_GEM_DOMAIN_GTT; return radeon_state_pm4(state); } @@ -159,11 +159,12 @@ static int r600_pipe_shader_ps(struct pipe_context *ctx, struct r600_context_sta struct r600_shader *rshader = &rpshader->shader; struct radeon_state *state; unsigned i, tmp; + int r; - rpshader->rstate = radeon_state_decref(rpshader->rstate); - state = radeon_state(rscreen->rw, R600_PS_SHADER_TYPE, R600_PS_SHADER); - if (state == NULL) - return -ENOMEM; + r = radeon_state_init(&rpshader->rstate, rscreen->rw, R600_PS_SHADER_TYPE, R600_PS_SHADER); + if (r) + return r; + state = &rpshader->rstate; for (i = 0; i < rshader->ninput; i++) { tmp = S_028644_SEMANTIC(i); tmp |= S_028644_SEL_CENTROID(1); @@ -178,10 +179,9 @@ static int r600_pipe_shader_ps(struct pipe_context *ctx, struct r600_context_sta state->states[R600_PS_SHADER__SPI_PS_IN_CONTROL_1] = 0x00000000; state->states[R600_PS_SHADER__SQ_PGM_RESOURCES_PS] = S_028868_NUM_GPRS(rshader->bc.ngpr); state->states[R600_PS_SHADER__SQ_PGM_EXPORTS_PS] = 0x00000002; - rpshader->rstate = state; - rpshader->rstate->bo[0] = radeon_bo_incref(rscreen->rw, rpshader->bo); - rpshader->rstate->nbo = 1; - rpshader->rstate->placement[0] = RADEON_GEM_DOMAIN_GTT; + rpshader->rstate.bo[0] = radeon_bo_incref(rscreen->rw, rpshader->bo); + rpshader->rstate.nbo = 1; + rpshader->rstate.placement[0] = RADEON_GEM_DOMAIN_GTT; return radeon_state_pm4(state); } diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index e8a591f73e..926a19cc6f 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -277,9 +277,10 @@ static void r600_set_constant_buffer(struct pipe_context *ctx, struct r600_screen *rscreen = r600_screen(ctx->screen); struct r600_context *rctx = r600_context(ctx); unsigned nconstant = 0, i, type, id; - struct radeon_state *rstate; + struct radeon_state rstate; struct pipe_transfer *transfer; u32 *ptr; + int r; switch (shader) { case PIPE_SHADER_VERTEX: @@ -300,16 +301,16 @@ static void r600_set_constant_buffer(struct pipe_context *ctx, if (ptr == NULL) return; for (i = 0; i < nconstant; i++) { - rstate = radeon_state(rscreen->rw, type, id + i); - if (rstate == NULL) + r = radeon_state_init(&rstate, rscreen->rw, type, id + i); + if (r) return; - rstate->states[R600_PS_CONSTANT__SQ_ALU_CONSTANT0_0] = ptr[i * 4 + 0]; - rstate->states[R600_PS_CONSTANT__SQ_ALU_CONSTANT1_0] = ptr[i * 4 + 1]; - rstate->states[R600_PS_CONSTANT__SQ_ALU_CONSTANT2_0] = ptr[i * 4 + 2]; - rstate->states[R600_PS_CONSTANT__SQ_ALU_CONSTANT3_0] = ptr[i * 4 + 3]; - if (radeon_state_pm4(rstate)) + rstate.states[R600_PS_CONSTANT__SQ_ALU_CONSTANT0_0] = ptr[i * 4 + 0]; + rstate.states[R600_PS_CONSTANT__SQ_ALU_CONSTANT1_0] = ptr[i * 4 + 1]; + rstate.states[R600_PS_CONSTANT__SQ_ALU_CONSTANT2_0] = ptr[i * 4 + 2]; + rstate.states[R600_PS_CONSTANT__SQ_ALU_CONSTANT3_0] = ptr[i * 4 + 3]; + if (radeon_state_pm4(&rstate)) return; - if (radeon_draw_set_new(rctx->draw, rstate)) + if (radeon_draw_set(&rctx->draw, &rstate)) return; } pipe_buffer_unmap(ctx, buffer, transfer); @@ -520,7 +521,6 @@ struct r600_context_state *r600_context_state_decref(struct r600_context_state * R600_ERR("invalid type %d\n", rstate->type); return NULL; } - radeon_state_decref(rstate->rstate); FREE(rstate); return NULL; } @@ -603,16 +603,17 @@ struct r600_context_state *r600_context_state(struct r600_context *rctx, unsigne return rstate; } -static struct radeon_state *r600_blend(struct r600_context *rctx) +static int r600_blend(struct r600_context *rctx, struct radeon_state *rstate) { struct r600_screen *rscreen = rctx->screen; - struct radeon_state *rstate; const struct pipe_blend_state *state = &rctx->blend->state.blend; int i; + int r; + + r = radeon_state_init(rstate, rscreen->rw, R600_BLEND_TYPE, R600_BLEND); + if (r) + return r; - rstate = radeon_state(rscreen->rw, R600_BLEND_TYPE, R600_BLEND); - if (rstate == NULL) - return NULL; rstate->states[R600_BLEND__CB_BLEND_RED] = fui(rctx->blend_color.color[0]); rstate->states[R600_BLEND__CB_BLEND_GREEN] = fui(rctx->blend_color.color[1]); rstate->states[R600_BLEND__CB_BLEND_BLUE] = fui(rctx->blend_color.color[2]); @@ -656,26 +657,23 @@ static struct radeon_state *r600_blend(struct r600_context *rctx) rstate->states[R600_BLEND__CB_BLEND_CONTROL] = bc; } - if (radeon_state_pm4(rstate)) { - radeon_state_decref(rstate); - return NULL; - } - return rstate; + return radeon_state_pm4(rstate); } -static struct radeon_state *r600_cb0(struct r600_context *rctx) +static int r600_cb0(struct r600_context *rctx, struct radeon_state *rstate) { struct r600_screen *rscreen = rctx->screen; struct r600_resource_texture *rtex; struct r600_resource *rbuffer; - struct radeon_state *rstate; const struct pipe_framebuffer_state *state = &rctx->framebuffer->state.framebuffer; unsigned level = state->cbufs[0]->level; unsigned pitch, slice; + int r; + + r = radeon_state_init(rstate, rscreen->rw, R600_CB0_TYPE, R600_CB0); + if (r) + return r; - rstate = radeon_state(rscreen->rw, R600_CB0_TYPE, R600_CB0); - if (rstate == NULL) - return NULL; rtex = (struct r600_resource_texture*)state->cbufs[0]->texture; rbuffer = &rtex->resource; rstate->bo[0] = radeon_bo_incref(rscreen->rw, rbuffer->bo); @@ -695,11 +693,7 @@ static struct radeon_state *r600_cb0(struct r600_context *rctx) rstate->states[R600_CB0__CB_COLOR0_FRAG] = 0x00000000; rstate->states[R600_CB0__CB_COLOR0_TILE] = 0x00000000; rstate->states[R600_CB0__CB_COLOR0_MASK] = 0x00000000; - if (radeon_state_pm4(rstate)) { - radeon_state_decref(rstate); - return NULL; - } - return rstate; + return radeon_state_pm4(rstate); } static int r600_db_format(unsigned pformat, unsigned *format) @@ -718,23 +712,22 @@ static int r600_db_format(unsigned pformat, unsigned *format) } } -static struct radeon_state *r600_db(struct r600_context *rctx) +static int r600_db(struct r600_context *rctx, struct radeon_state *rstate) { struct r600_screen *rscreen = rctx->screen; struct r600_resource_texture *rtex; struct r600_resource *rbuffer; - struct radeon_state *rstate; const struct pipe_framebuffer_state *state = &rctx->framebuffer->state.framebuffer; unsigned level = state->cbufs[0]->level; unsigned pitch, slice, format; + int r; - if (state->zsbuf == NULL) - return NULL; - - rstate = radeon_state(rscreen->rw, R600_DB_TYPE, R600_DB); - if (rstate == NULL) - return NULL; + r = radeon_state_init(rstate, rscreen->rw, R600_DB_TYPE, R600_DB); + if (r) + return r; + if (state->zsbuf == NULL) + return 0; rtex = (struct r600_resource_texture*)state->zsbuf->texture; rbuffer = &rtex->resource; rstate->bo[0] = radeon_bo_incref(rscreen->rw, rbuffer->bo); @@ -744,8 +737,7 @@ static struct radeon_state *r600_db(struct r600_context *rctx) pitch = (rtex->pitch[level] / rtex->bpt) / 8 - 1; slice = (rtex->pitch[level] / rtex->bpt) * state->zsbuf->height / 64 - 1; if (r600_db_format(state->zsbuf->texture->format, &format)) { - radeon_state_decref(rstate); - return NULL; + return -EINVAL; } rstate->states[R600_DB__DB_DEPTH_BASE] = 0x00000000; rstate->states[R600_DB__DB_DEPTH_INFO] = 0x00010000 | @@ -754,23 +746,22 @@ static struct radeon_state *r600_db(struct r600_context *rctx) rstate->states[R600_DB__DB_PREFETCH_LIMIT] = (state->zsbuf->height / 8) -1; rstate->states[R600_DB__DB_DEPTH_SIZE] = S_028000_PITCH_TILE_MAX(pitch) | S_028000_SLICE_TILE_MAX(slice); - if (radeon_state_pm4(rstate)) { - radeon_state_decref(rstate); - return NULL; - } - return rstate; + return radeon_state_pm4(rstate); } -static struct radeon_state *r600_rasterizer(struct r600_context *rctx) +static int r600_rasterizer(struct r600_context *rctx, struct radeon_state *rstate) { const struct pipe_rasterizer_state *state = &rctx->rasterizer->state.rasterizer; const struct pipe_framebuffer_state *fb = &rctx->framebuffer->state.framebuffer; struct r600_screen *rscreen = rctx->screen; - struct radeon_state *rstate; float offset_units = 0, offset_scale = 0; char depth = 0; unsigned offset_db_fmt_cntl = 0; + int r; + r = radeon_state_init(rstate, rscreen->rw, R600_RASTERIZER_TYPE, R600_RASTERIZER); + if (r) + return r; if (fb->zsbuf) { offset_units = state->offset_units; offset_scale = state->offset_scale * 12.0f; @@ -791,15 +782,12 @@ static struct radeon_state *r600_rasterizer(struct r600_context *rctx) break; default: R600_ERR("unsupported %d\n", fb->zsbuf->texture->format); - return NULL; + return -EINVAL; } } offset_db_fmt_cntl |= S_028DF8_POLY_OFFSET_NEG_NUM_DB_BITS(depth); rctx->flat_shade = state->flatshade; - rstate = radeon_state(rscreen->rw, R600_RASTERIZER_TYPE, R600_RASTERIZER); - if (rstate == NULL) - return NULL; rstate->states[R600_RASTERIZER__SPI_INTERP_CONTROL_0] = 0x00000001; rstate->states[R600_RASTERIZER__PA_CL_CLIP_CNTL] = 0x00000000; rstate->states[R600_RASTERIZER__PA_SU_SC_MODE_CNTL] = 0x00080000 | @@ -827,25 +815,21 @@ static struct radeon_state *r600_rasterizer(struct r600_context *rctx) rstate->states[R600_RASTERIZER__PA_SU_POLY_OFFSET_FRONT_OFFSET] = fui(offset_units); rstate->states[R600_RASTERIZER__PA_SU_POLY_OFFSET_BACK_SCALE] = fui(offset_scale); rstate->states[R600_RASTERIZER__PA_SU_POLY_OFFSET_BACK_OFFSET] = fui(offset_units); - if (radeon_state_pm4(rstate)) { - radeon_state_decref(rstate); - return NULL; - } - return rstate; + return radeon_state_pm4(rstate); } -static struct radeon_state *r600_scissor(struct r600_context *rctx) +static int r600_scissor(struct r600_context *rctx, struct radeon_state *rstate) { const struct pipe_scissor_state *state = &rctx->scissor->state.scissor; struct r600_screen *rscreen = rctx->screen; - struct radeon_state *rstate; u32 tl, br; + int r; + r = radeon_state_init(rstate, rscreen->rw, R600_SCISSOR_TYPE, R600_SCISSOR); + if (r) + return r; tl = S_028240_TL_X(state->minx) | S_028240_TL_Y(state->miny) | S_028240_WINDOW_OFFSET_DISABLE(1); br = S_028244_BR_X(state->maxx) | S_028244_BR_Y(state->maxy); - rstate = radeon_state(rscreen->rw, R600_SCISSOR_TYPE, R600_SCISSOR); - if (rstate == NULL) - return NULL; rstate->states[R600_SCISSOR__PA_SC_SCREEN_SCISSOR_TL] = tl; rstate->states[R600_SCISSOR__PA_SC_SCREEN_SCISSOR_BR] = br; rstate->states[R600_SCISSOR__PA_SC_WINDOW_OFFSET] = 0x00000000; @@ -865,22 +849,18 @@ static struct radeon_state *r600_scissor(struct r600_context *rctx) rstate->states[R600_SCISSOR__PA_SC_GENERIC_SCISSOR_BR] = br; rstate->states[R600_SCISSOR__PA_SC_VPORT_SCISSOR_0_TL] = tl; rstate->states[R600_SCISSOR__PA_SC_VPORT_SCISSOR_0_BR] = br; - if (radeon_state_pm4(rstate)) { - radeon_state_decref(rstate); - return NULL; - } - return rstate; + return radeon_state_pm4(rstate); } -static struct radeon_state *r600_viewport(struct r600_context *rctx) +static int r600_viewport(struct r600_context *rctx, struct radeon_state *rstate) { const struct pipe_viewport_state *state = &rctx->viewport->state.viewport; struct r600_screen *rscreen = rctx->screen; - struct radeon_state *rstate; + int r; - rstate = radeon_state(rscreen->rw, R600_VIEWPORT_TYPE, R600_VIEWPORT); - if (rstate == NULL) - return NULL; + r = radeon_state_init(rstate, rscreen->rw, R600_VIEWPORT_TYPE, R600_VIEWPORT); + if (r) + return r; rstate->states[R600_VIEWPORT__PA_SC_VPORT_ZMIN_0] = 0x00000000; rstate->states[R600_VIEWPORT__PA_SC_VPORT_ZMAX_0] = 0x3F800000; rstate->states[R600_VIEWPORT__PA_CL_VPORT_XSCALE_0] = fui(state->scale[0]); @@ -890,33 +870,29 @@ static struct radeon_state *r600_viewport(struct r600_context *rctx) rstate->states[R600_VIEWPORT__PA_CL_VPORT_YOFFSET_0] = fui(state->translate[1]); rstate->states[R600_VIEWPORT__PA_CL_VPORT_ZOFFSET_0] = fui(state->translate[2]); rstate->states[R600_VIEWPORT__PA_CL_VTE_CNTL] = 0x0000043F; - if (radeon_state_pm4(rstate)) { - radeon_state_decref(rstate); - return NULL; - } - return rstate; + return radeon_state_pm4(rstate); } -static struct radeon_state *r600_dsa(struct r600_context *rctx) +static int r600_dsa(struct r600_context *rctx, struct radeon_state *rstate) { const struct pipe_depth_stencil_alpha_state *state = &rctx->dsa->state.dsa; const struct pipe_stencil_ref *stencil_ref = &rctx->stencil_ref->state.stencil_ref; struct r600_screen *rscreen = rctx->screen; - struct radeon_state *rstate; unsigned db_depth_control, alpha_test_control, alpha_ref; unsigned stencil_ref_mask, stencil_ref_mask_bf; - - rstate = radeon_state(rscreen->rw, R600_DSA_TYPE, R600_DSA); - if (rstate == NULL) - return NULL; + int r; + + r = radeon_state_init(rstate, rscreen->rw, R600_DSA_TYPE, R600_DSA); + if (r) + return r; stencil_ref_mask = 0; stencil_ref_mask_bf = 0; db_depth_control = S_028800_Z_ENABLE(state->depth.enabled) | S_028800_Z_WRITE_ENABLE(state->depth.writemask) | S_028800_ZFUNC(state->depth.func); - /* set stencil enable */ + /* set stencil enable */ if (state->stencil[0].enabled) { db_depth_control |= S_028800_STENCIL_ENABLE(1); db_depth_control |= S_028800_STENCILFUNC(r600_translate_ds_func(state->stencil[0].func)); @@ -963,11 +939,7 @@ static struct radeon_state *r600_dsa(struct r600_context *rctx) rstate->states[R600_DSA__DB_SRESULTS_COMPARE_STATE1] = 0x00000000; rstate->states[R600_DSA__DB_PRELOAD_CONTROL] = 0x00000000; rstate->states[R600_DSA__DB_ALPHA_TO_MASK] = 0x0000AA00; - if (radeon_state_pm4(rstate)) { - radeon_state_decref(rstate); - return NULL; - } - return rstate; + return radeon_state_pm4(rstate); } static inline unsigned r600_tex_wrap(unsigned wrap) @@ -1045,16 +1017,15 @@ static INLINE u32 S_FIXED(float value, u32 frac_bits) return value * (1 << frac_bits); } -static struct radeon_state *r600_sampler(struct r600_context *rctx, - const struct pipe_sampler_state *state, - unsigned id) +static int r600_sampler(struct r600_context *rctx, struct radeon_state *rstate, + const struct pipe_sampler_state *state, unsigned id) { struct r600_screen *rscreen = rctx->screen; - struct radeon_state *rstate; + int r; - rstate = radeon_state(rscreen->rw, R600_PS_SAMPLER_TYPE, id); - if (rstate == NULL) - return NULL; + r = radeon_state_init(rstate, rscreen->rw, R600_PS_SAMPLER_TYPE, id); + if (r) + return r; rstate->states[R600_PS_SAMPLER__SQ_TEX_SAMPLER_WORD0_0] = S_03C000_CLAMP_X(r600_tex_wrap(state->wrap_s)) | S_03C000_CLAMP_Y(r600_tex_wrap(state->wrap_t)) | @@ -1069,11 +1040,7 @@ static struct radeon_state *r600_sampler(struct r600_context *rctx, S_03C004_MAX_LOD(S_FIXED(CLAMP(state->max_lod, 0, 15), 6)) | S_03C004_LOD_BIAS(S_FIXED(CLAMP(state->lod_bias, -16, 16), 6)); rstate->states[R600_PS_SAMPLER__SQ_TEX_SAMPLER_WORD2_0] = S_03C008_TYPE(1); - if (radeon_state_pm4(rstate)) { - radeon_state_decref(rstate); - return NULL; - } - return rstate; + return radeon_state_pm4(rstate); } static inline unsigned r600_tex_swizzle(unsigned swizzle) @@ -1123,27 +1090,25 @@ static inline unsigned r600_tex_dim(unsigned dim) } } -static struct radeon_state *r600_resource(struct r600_context *rctx, - const struct pipe_sampler_view *view, - unsigned id) +static int r600_resource(struct r600_context *rctx, struct radeon_state *rstate, + const struct pipe_sampler_view *view, unsigned id) { struct r600_screen *rscreen = rctx->screen; const struct util_format_description *desc; struct r600_resource_texture *tmp; struct r600_resource *rbuffer; - struct radeon_state *rstate; unsigned format; + int r; + r = radeon_state_init(rstate, rscreen->rw, R600_PS_RESOURCE_TYPE, id); + if (r) + return r; if (r600_conv_pipe_format(view->texture->format, &format)) - return NULL; + return -EINVAL; desc = util_format_description(view->texture->format); if (desc == NULL) { R600_ERR("unknow format %d\n", view->texture->format); - return NULL; - } - rstate = radeon_state(rscreen->rw, R600_PS_RESOURCE_TYPE, id); - if (rstate == NULL) { - return NULL; + return -EINVAL; } tmp = (struct r600_resource_texture*)view->texture; rbuffer = &tmp->resource; @@ -1185,20 +1150,15 @@ static struct radeon_state *r600_resource(struct r600_context *rctx, S_038014_LAST_ARRAY(0); rstate->states[R600_PS_RESOURCE__RESOURCE0_WORD6] = S_038018_TYPE(V_038010_SQ_TEX_VTX_VALID_TEXTURE); - if (radeon_state_pm4(rstate)) { - radeon_state_decref(rstate); - return NULL; - } - return rstate; + return radeon_state_pm4(rstate); } -static struct radeon_state *r600_cb_cntl(struct r600_context *rctx) +static int r600_cb_cntl(struct r600_context *rctx, struct radeon_state *rstate) { struct r600_screen *rscreen = rctx->screen; - struct radeon_state *rstate; const struct pipe_blend_state *pbs = &rctx->blend->state.blend; uint32_t color_control, target_mask; - int i; + int i, r; target_mask = 0; color_control = S_028808_PER_MRT_BLEND(1); @@ -1215,7 +1175,9 @@ static struct radeon_state *r600_cb_cntl(struct r600_context *rctx) target_mask |= (pbs->rt[0].colormask << (4 * i)); } } - rstate = radeon_state(rscreen->rw, R600_CB_CNTL_TYPE, R600_CB_CNTL); + r = radeon_state_init(rstate, rscreen->rw, R600_CB_CNTL_TYPE, R600_CB_CNTL); + if (r) + return r; rstate->states[R600_CB_CNTL__CB_SHADER_MASK] = 0x0000000F; rstate->states[R600_CB_CNTL__CB_TARGET_MASK] = target_mask; rstate->states[R600_CB_CNTL__CB_COLOR_CONTROL] = color_control; @@ -1227,11 +1189,7 @@ static struct radeon_state *r600_cb_cntl(struct r600_context *rctx) rstate->states[R600_CB_CNTL__CB_CLRCMP_DST] = 0x000000FF; rstate->states[R600_CB_CNTL__CB_CLRCMP_MSK] = 0xFFFFFFFF; rstate->states[R600_CB_CNTL__PA_SC_AA_MASK] = 0xFFFFFFFF; - if (radeon_state_pm4(rstate)) { - radeon_state_decref(rstate); - return NULL; - } - return rstate; + return radeon_state_pm4(rstate); } int r600_context_hw_states(struct r600_context *rctx) @@ -1242,91 +1200,74 @@ int r600_context_hw_states(struct r600_context *rctx) /* free previous TODO determine what need to be updated, what * doesn't */ - //radeon_state_decref(rctx->hw_states.config); - radeon_state_decref(rctx->hw_states.cb_cntl); - radeon_state_decref(rctx->hw_states.db); - radeon_state_decref(rctx->hw_states.rasterizer); - radeon_state_decref(rctx->hw_states.scissor); - radeon_state_decref(rctx->hw_states.dsa); - radeon_state_decref(rctx->hw_states.blend); - radeon_state_decref(rctx->hw_states.viewport); - radeon_state_decref(rctx->hw_states.cb0); - for (i = 0; i < rctx->hw_states.ps_nresource; i++) { - radeon_state_decref(rctx->hw_states.ps_resource[i]); - rctx->hw_states.ps_resource[i] = NULL; - } - rctx->hw_states.ps_nresource = 0; - for (i = 0; i < rctx->hw_states.ps_nsampler; i++) { - radeon_state_decref(rctx->hw_states.ps_sampler[i]); - rctx->hw_states.ps_sampler[i] = NULL; - } - rctx->hw_states.ps_nsampler = 0; + memset(&rctx->hw_states, 0, sizeof(struct r600_context_hw_states)); /* build new states */ - rctx->hw_states.rasterizer = r600_rasterizer(rctx); - rctx->hw_states.scissor = r600_scissor(rctx); - rctx->hw_states.dsa = r600_dsa(rctx); - rctx->hw_states.blend = r600_blend(rctx); - rctx->hw_states.viewport = r600_viewport(rctx); - rctx->hw_states.cb0 = r600_cb0(rctx); - rctx->hw_states.db = r600_db(rctx); - rctx->hw_states.cb_cntl = r600_cb_cntl(rctx); + rctx->hw_states.config = rctx->config; + r600_rasterizer(rctx, &rctx->hw_states.rasterizer); + r600_scissor(rctx, &rctx->hw_states.scissor); + r600_dsa(rctx, &rctx->hw_states.dsa); + r600_blend(rctx, &rctx->hw_states.blend); + r600_viewport(rctx, &rctx->hw_states.viewport); + r600_cb0(rctx, &rctx->hw_states.cb0); + r600_db(rctx, &rctx->hw_states.db); + r600_cb_cntl(rctx, &rctx->hw_states.cb_cntl); for (i = 0; i < rctx->ps_nsampler; i++) { if (rctx->ps_sampler[i]) { - rctx->hw_states.ps_sampler[i] = r600_sampler(rctx, - &rctx->ps_sampler[i]->state.sampler, - R600_PS_SAMPLER + i); + r600_sampler(rctx, &rctx->hw_states.ps_sampler[i], + &rctx->ps_sampler[i]->state.sampler, + R600_PS_SAMPLER + i); } } rctx->hw_states.ps_nsampler = rctx->ps_nsampler; for (i = 0; i < rctx->ps_nsampler_view; i++) { if (rctx->ps_sampler_view[i]) { - rctx->hw_states.ps_resource[i] = r600_resource(rctx, - &rctx->ps_sampler_view[i]->state.sampler_view, - R600_PS_RESOURCE + i); + r600_resource(rctx, &rctx->hw_states.ps_resource[i], + &rctx->ps_sampler_view[i]->state.sampler_view, + R600_PS_RESOURCE + i); } } rctx->hw_states.ps_nresource = rctx->ps_nsampler_view; /* bind states */ - r = radeon_draw_set(rctx->draw, rctx->hw_states.db); + r = radeon_draw_set(&rctx->draw, &rctx->hw_states.db); if (r) return r; - r = radeon_draw_set(rctx->draw, rctx->hw_states.rasterizer); + r = radeon_draw_set(&rctx->draw, &rctx->hw_states.rasterizer); if (r) return r; - r = radeon_draw_set(rctx->draw, rctx->hw_states.scissor); + r = radeon_draw_set(&rctx->draw, &rctx->hw_states.scissor); if (r) return r; - r = radeon_draw_set(rctx->draw, rctx->hw_states.dsa); + r = radeon_draw_set(&rctx->draw, &rctx->hw_states.dsa); if (r) return r; - r = radeon_draw_set(rctx->draw, rctx->hw_states.blend); + r = radeon_draw_set(&rctx->draw, &rctx->hw_states.blend); if (r) return r; - r = radeon_draw_set(rctx->draw, rctx->hw_states.viewport); + r = radeon_draw_set(&rctx->draw, &rctx->hw_states.viewport); if (r) return r; - r = radeon_draw_set(rctx->draw, rctx->hw_states.cb0); + r = radeon_draw_set(&rctx->draw, &rctx->hw_states.cb0); if (r) return r; - r = radeon_draw_set(rctx->draw, rctx->hw_states.config); + r = radeon_draw_set(&rctx->draw, &rctx->hw_states.config); if (r) return r; - r = radeon_draw_set(rctx->draw, rctx->hw_states.cb_cntl); + r = radeon_draw_set(&rctx->draw, &rctx->hw_states.cb_cntl); if (r) return r; for (i = 0; i < rctx->hw_states.ps_nresource; i++) { - if (rctx->hw_states.ps_resource[i]) { - r = radeon_draw_set(rctx->draw, rctx->hw_states.ps_resource[i]); + if (rctx->hw_states.ps_resource[i].valid) { + r = radeon_draw_set(&rctx->draw, &rctx->hw_states.ps_resource[i]); if (r) return r; } } for (i = 0; i < rctx->hw_states.ps_nsampler; i++) { - if (rctx->hw_states.ps_sampler[i]) { - r = radeon_draw_set(rctx->draw, rctx->hw_states.ps_sampler[i]); + if (rctx->hw_states.ps_sampler[i].valid) { + r = radeon_draw_set(&rctx->draw, &rctx->hw_states.ps_sampler[i]); if (r) return r; } diff --git a/src/gallium/drivers/r600/radeon.h b/src/gallium/drivers/r600/radeon.h index 3a8405f9b4..709ef8a85a 100644 --- a/src/gallium/drivers/r600/radeon.h +++ b/src/gallium/drivers/r600/radeon.h @@ -103,17 +103,17 @@ int radeon_bo_wait(struct radeon *radeon, struct radeon_bo *bo); */ struct radeon_state { struct radeon *radeon; - unsigned refcount; + unsigned valid; unsigned type; unsigned id; unsigned nstates; - u32 *states; + u32 states[64]; unsigned npm4; unsigned cpm4; u32 pm4_crc; - u32 *pm4; + u32 pm4[128]; u32 nimmd; - u32 *immd; + u32 immd[64]; unsigned nbo; struct radeon_bo *bo[4]; unsigned nreloc; @@ -123,35 +123,51 @@ struct radeon_state { unsigned bo_dirty[4]; }; -struct radeon_state *radeon_state(struct radeon *radeon, u32 type, u32 id); -struct radeon_state *radeon_state_incref(struct radeon_state *state); -struct radeon_state *radeon_state_decref(struct radeon_state *state); +int radeon_state_init(struct radeon_state *state, struct radeon *radeon, u32 type, u32 id); int radeon_state_pm4(struct radeon_state *state); /* * draw functions */ struct radeon_draw { - unsigned refcount; struct radeon *radeon; unsigned nstate; - struct radeon_state **state; + struct radeon_state state[1273]; unsigned cpm4; }; -struct radeon_draw *radeon_draw(struct radeon *radeon); -struct radeon_draw *radeon_draw_duplicate(struct radeon_draw *draw); -struct radeon_draw *radeon_draw_incref(struct radeon_draw *draw); -struct radeon_draw *radeon_draw_decref(struct radeon_draw *draw); +int radeon_draw_init(struct radeon_draw *draw, struct radeon *radeon); int radeon_draw_set(struct radeon_draw *draw, struct radeon_state *state); -int radeon_draw_set_new(struct radeon_draw *draw, struct radeon_state *state); int radeon_draw_check(struct radeon_draw *draw); -struct radeon_ctx *radeon_ctx(struct radeon *radeon); -struct radeon_ctx *radeon_ctx_decref(struct radeon_ctx *ctx); -struct radeon_ctx *radeon_ctx_incref(struct radeon_ctx *ctx); +/* + * Context + */ +#pragma pack(1) +struct radeon_cs_reloc { + uint32_t handle; + uint32_t read_domain; + uint32_t write_domain; + uint32_t flags; +}; +#pragma pack() + +struct radeon_ctx { + struct radeon *radeon; + u32 *pm4; + u32 cpm4; + u32 draw_cpm4; + unsigned id; + unsigned nreloc; + struct radeon_cs_reloc reloc[2048]; + unsigned nbo; + struct radeon_bo *bo[2048]; + unsigned ndraw; + struct radeon_draw draw[128]; +}; + +int radeon_ctx_init(struct radeon_ctx *ctx, struct radeon *radeon); int radeon_ctx_set_draw(struct radeon_ctx *ctx, struct radeon_draw *draw); -int radeon_ctx_set_draw_new(struct radeon_ctx *ctx, struct radeon_draw *draw); int radeon_ctx_pm4(struct radeon_ctx *ctx); int radeon_ctx_submit(struct radeon_ctx *ctx); void radeon_ctx_dump_bof(struct radeon_ctx *ctx, const char *file); diff --git a/src/gallium/targets/dri-r600/Makefile b/src/gallium/targets/dri-r600/Makefile index 932303d194..9c8b4ab252 100644 --- a/src/gallium/targets/dri-r600/Makefile +++ b/src/gallium/targets/dri-r600/Makefile @@ -4,12 +4,12 @@ include $(TOP)/configs/current LIBNAME = r600_dri.so PIPE_DRIVERS = \ + $(TOP)/src/gallium/drivers/r600/libr600.a \ $(TOP)/src/gallium/state_trackers/dri/drm/libdridrm.a \ $(TOP)/src/gallium/winsys/r600/drm/libr600winsys.a \ $(TOP)/src/gallium/drivers/softpipe/libsoftpipe.a \ $(TOP)/src/gallium/drivers/trace/libtrace.a \ - $(TOP)/src/gallium/drivers/rbug/librbug.a \ - $(TOP)/src/gallium/drivers/r600/libr600.a + $(TOP)/src/gallium/drivers/rbug/librbug.a C_SOURCES = \ target.c \ diff --git a/src/gallium/winsys/r600/drm/radeon.c b/src/gallium/winsys/r600/drm/radeon.c index 7e65669806..24d821d5cf 100644 --- a/src/gallium/winsys/r600/drm/radeon.c +++ b/src/gallium/winsys/r600/drm/radeon.c @@ -43,16 +43,6 @@ static int radeon_get_device(struct radeon *radeon) return r; } -/* symbol missing drove me crazy hack to get symbol exported */ -static void fake(void) -{ - struct radeon_ctx *ctx; - struct radeon_draw *draw; - - ctx = radeon_ctx(NULL); - draw = radeon_draw(NULL); -} - struct radeon *radeon_new(int fd, unsigned device) { struct radeon *radeon; @@ -60,7 +50,6 @@ struct radeon *radeon_new(int fd, unsigned device) radeon = calloc(1, sizeof(*radeon)); if (radeon == NULL) { - fake(); return NULL; } radeon->fd = fd; diff --git a/src/gallium/winsys/r600/drm/radeon_ctx.c b/src/gallium/winsys/r600/drm/radeon_ctx.c index 6b0eba0b28..af270d5d20 100644 --- a/src/gallium/winsys/r600/drm/radeon_ctx.c +++ b/src/gallium/winsys/r600/drm/radeon_ctx.c @@ -32,13 +32,8 @@ int radeon_ctx_set_bo_new(struct radeon_ctx *ctx, struct radeon_bo *bo) { - void *ptr; - - ptr = realloc(ctx->bo, sizeof(struct radeon_bo) * (ctx->nbo + 1)); - if (ptr == NULL) { - return -ENOMEM; - } - ctx->bo = ptr; + if (ctx->nbo >= 2048) + return -EBUSY; ctx->bo[ctx->nbo] = bo; ctx->nbo++; return 0; @@ -76,49 +71,26 @@ void radeon_ctx_get_placement(struct radeon_ctx *ctx, unsigned reloc, u32 *place } } -struct radeon_ctx *radeon_ctx(struct radeon *radeon) -{ - struct radeon_ctx *ctx; - - if (radeon == NULL) - return NULL; - ctx = calloc(1, sizeof(*ctx)); - if (ctx == NULL) - return NULL; - ctx->radeon = radeon_incref(radeon); - return ctx; -} - -struct radeon_ctx *radeon_ctx_incref(struct radeon_ctx *ctx) +static void radeon_ctx_clear(struct radeon_ctx *ctx) { - ctx->refcount++; - return ctx; + ctx->draw_cpm4 = 0; + ctx->cpm4 = 0; + ctx->ndraw = 0; + ctx->nbo = 0; + ctx->nreloc = 0; } -struct radeon_ctx *radeon_ctx_decref(struct radeon_ctx *ctx) +int radeon_ctx_init(struct radeon_ctx *ctx, struct radeon *radeon) { - unsigned i; - - if (ctx == NULL) - return NULL; - if (--ctx->refcount > 0) { - return NULL; - } - - for (i = 0; i < ctx->ndraw; i++) { - ctx->draw[i] = radeon_draw_decref(ctx->draw[i]); - } - for (i = 0; i < ctx->nbo; i++) { - ctx->bo[i] = radeon_bo_decref(ctx->radeon, ctx->bo[i]); - } - ctx->radeon = radeon_decref(ctx->radeon); - free(ctx->draw); - free(ctx->bo); + memset(ctx, 0, sizeof(struct radeon_ctx)); + ctx->radeon = radeon_incref(radeon); + radeon_ctx_clear(ctx); free(ctx->pm4); - free(ctx->reloc); - memset(ctx, 0, sizeof(*ctx)); - free(ctx); - return NULL; + ctx->cpm4 = 0; + ctx->pm4 = malloc(64 * 1024); + if (ctx->pm4 == NULL) + return -ENOMEM; + return 0; } static int radeon_ctx_state_bo(struct radeon_ctx *ctx, struct radeon_state *state) @@ -143,7 +115,6 @@ static int radeon_ctx_state_bo(struct radeon_ctx *ctx, struct radeon_state *stat return 0; } - int radeon_ctx_submit(struct radeon_ctx *ctx) { struct drm_radeon_cs drmib; @@ -170,6 +141,7 @@ int radeon_ctx_submit(struct radeon_ctx *ctx) r = drmCommandWriteRead(ctx->radeon->fd, DRM_RADEON_CS, &drmib, sizeof(struct drm_radeon_cs)); #endif + radeon_ctx_clear(ctx); return r; } @@ -177,7 +149,6 @@ static int radeon_ctx_reloc(struct radeon_ctx *ctx, struct radeon_bo *bo, unsigned id, unsigned *placement) { unsigned i; - struct radeon_cs_reloc *ptr; for (i = 0; i < ctx->nreloc; i++) { if (ctx->reloc[i].handle == bo->handle) { @@ -185,14 +156,12 @@ static int radeon_ctx_reloc(struct radeon_ctx *ctx, struct radeon_bo *bo, return 0; } } - ptr = realloc(ctx->reloc, sizeof(struct radeon_cs_reloc) * (ctx->nreloc + 1)); - if (ptr == NULL) - return -ENOMEM; - ctx->reloc = ptr; - ptr[ctx->nreloc].handle = bo->handle; - ptr[ctx->nreloc].read_domain = placement[0] | placement [1]; - ptr[ctx->nreloc].write_domain = placement[0] | placement [1]; - ptr[ctx->nreloc].flags = 0; + if (ctx->nreloc >= 2048) + return -EINVAL; + ctx->reloc[ctx->nreloc].handle = bo->handle; + ctx->reloc[ctx->nreloc].read_domain = placement[0] | placement [1]; + ctx->reloc[ctx->nreloc].write_domain = placement[0] | placement [1]; + ctx->reloc[ctx->nreloc].flags = 0; ctx->pm4[id] = ctx->nreloc * sizeof(struct radeon_cs_reloc) / 4; ctx->nreloc++; return 0; @@ -221,21 +190,13 @@ static int radeon_ctx_state_schedule(struct radeon_ctx *ctx, struct radeon_state return 0; } -int radeon_ctx_set_draw_new(struct radeon_ctx *ctx, struct radeon_draw *draw) +int radeon_ctx_set_draw(struct radeon_ctx *ctx, struct radeon_draw *draw) { - struct radeon_draw *pdraw = NULL; - struct radeon_draw **ndraw; - struct radeon_state *nstate, *ostate; - unsigned cpm4, i, cstate; - void *tmp; + unsigned cpm4, i; int r = 0; - ndraw = realloc(ctx->draw, sizeof(void*) * (ctx->ndraw + 1)); - if (ndraw == NULL) - return -ENOMEM; - ctx->draw = ndraw; for (i = 0; i < draw->nstate; i++) { - r = radeon_ctx_state_bo(ctx, draw->state[i]); + r = radeon_ctx_state_bo(ctx, &draw->state[i]); if (r) return r; } @@ -247,69 +208,48 @@ int radeon_ctx_set_draw_new(struct radeon_ctx *ctx, struct radeon_draw *draw) __func__, draw->cpm4, RADEON_CTX_MAX_PM4); return -EINVAL; } - tmp = realloc(ctx->state, (ctx->nstate + draw->nstate) * sizeof(void*)); - if (tmp == NULL) - return -ENOMEM; - ctx->state = tmp; - pdraw = ctx->cdraw; - for (i = 0, cpm4 = 0, cstate = ctx->nstate; i < draw->nstate - 1; i++) { - nstate = draw->state[i]; - if (nstate) { - if (pdraw && pdraw->state[i]) { - ostate = pdraw->state[i]; - if (ostate->pm4_crc != nstate->pm4_crc) { - ctx->state[cstate++] = nstate; - cpm4 += nstate->cpm4; - } - } else { - ctx->state[cstate++] = nstate; - cpm4 += nstate->cpm4; + ctx->draw[ctx->ndraw] = *draw; + for (i = 0, cpm4 = 0; i < draw->nstate - 1; i++) { + ctx->draw[ctx->ndraw].state[i].valid &= ~2; + if (ctx->draw[ctx->ndraw].state[i].valid) { + if (ctx->ndraw > 1 && ctx->draw[ctx->ndraw - 1].state[i].valid) { + if (ctx->draw[ctx->ndraw - 1].state[i].pm4_crc == draw->state[i].pm4_crc) + continue; } + ctx->draw[ctx->ndraw].state[i].valid |= 2; + cpm4 += ctx->draw[ctx->ndraw].state[i].cpm4; } } /* The last state is the draw state always add it */ - if (draw->state[i] == NULL) { + if (!draw->state[i].valid) { fprintf(stderr, "%s no draw command\n", __func__); return -EINVAL; } - ctx->state[cstate++] = draw->state[i]; - cpm4 += draw->state[i]->cpm4; + ctx->draw[ctx->ndraw].state[i].valid |= 2; + cpm4 += ctx->draw[ctx->ndraw].state[i].cpm4; if ((ctx->draw_cpm4 + cpm4) > RADEON_CTX_MAX_PM4) { /* need to flush */ return -EBUSY; } ctx->draw_cpm4 += cpm4; - ctx->nstate = cstate; - ctx->draw[ctx->ndraw++] = draw; - ctx->cdraw = draw; + ctx->ndraw++; return 0; } -int radeon_ctx_set_draw(struct radeon_ctx *ctx, struct radeon_draw *draw) -{ - int r; - - radeon_draw_incref(draw); - r = radeon_ctx_set_draw_new(ctx, draw); - if (r) - radeon_draw_decref(draw); - return r; -} - int radeon_ctx_pm4(struct radeon_ctx *ctx) { - unsigned i; + unsigned i, j, c; int r; - free(ctx->pm4); - ctx->cpm4 = 0; - ctx->pm4 = malloc(ctx->draw_cpm4 * 4); - if (ctx->pm4 == NULL) - return -EINVAL; - for (i = 0, ctx->id = 0; i < ctx->nstate; i++) { - r = radeon_ctx_state_schedule(ctx, ctx->state[i]); - if (r) - return r; + for (i = 0, c = 0, ctx->id = 0; i < ctx->ndraw; i++) { + for (j = 0; j < ctx->draw[i].nstate; j++) { + if (ctx->draw[i].state[j].valid & 2) { + r = radeon_ctx_state_schedule(ctx, &ctx->draw[i].state[j]); + if (r) + return r; + c += ctx->draw[i].state[j].cpm4; + } + } } if (ctx->id != ctx->draw_cpm4) { fprintf(stderr, "%s miss predicted pm4 size %d for %d\n", diff --git a/src/gallium/winsys/r600/drm/radeon_draw.c b/src/gallium/winsys/r600/drm/radeon_draw.c index 4413ed79fb..53699eb0b1 100644 --- a/src/gallium/winsys/r600/drm/radeon_draw.c +++ b/src/gallium/winsys/r600/drm/radeon_draw.c @@ -31,111 +31,33 @@ /* * draw functions */ -struct radeon_draw *radeon_draw(struct radeon *radeon) +int radeon_draw_init(struct radeon_draw *draw, struct radeon *radeon) { - struct radeon_draw *draw; - - draw = calloc(1, sizeof(*draw)); - if (draw == NULL) - return NULL; + memset(draw, 0, sizeof(struct radeon_draw)); draw->nstate = radeon->nstate; draw->radeon = radeon; - draw->refcount = 1; - draw->state = calloc(1, sizeof(void*) * draw->nstate); - if (draw->state == NULL) { - free(draw); - return NULL; - } - return draw; -} - -struct radeon_draw *radeon_draw_incref(struct radeon_draw *draw) -{ - draw->refcount++; - return draw; -} - -struct radeon_draw *radeon_draw_decref(struct radeon_draw *draw) -{ - unsigned i; - - if (draw == NULL) - return NULL; - if (--draw->refcount > 0) - return NULL; - for (i = 0; i < draw->nstate; i++) { - draw->state[i] = radeon_state_decref(draw->state[i]); - } - free(draw->state); - memset(draw, 0, sizeof(*draw)); - free(draw); - return NULL; + return 0; } -int radeon_draw_set_new(struct radeon_draw *draw, struct radeon_state *state) +int radeon_draw_set(struct radeon_draw *draw, struct radeon_state *state) { if (state == NULL) return 0; if (state->type >= draw->radeon->ntype) return -EINVAL; - draw->state[state->id] = radeon_state_decref(draw->state[state->id]); - draw->state[state->id] = state; + draw->state[state->id] = *state; return 0; } -int radeon_draw_set(struct radeon_draw *draw, struct radeon_state *state) -{ - if (state == NULL) - return 0; - radeon_state_incref(state); - return radeon_draw_set_new(draw, state); -} - int radeon_draw_check(struct radeon_draw *draw) { unsigned i; int r; - r = radeon_draw_pm4(draw); - if (r) - return r; for (i = 0, draw->cpm4 = 0; i < draw->nstate; i++) { - if (draw->state[i]) { - draw->cpm4 += draw->state[i]->cpm4; + if (draw->state[i].valid) { + draw->cpm4 += draw->state[i].cpm4; } } return 0; } - -struct radeon_draw *radeon_draw_duplicate(struct radeon_draw *draw) -{ - struct radeon_draw *ndraw; - unsigned i; - - if (draw == NULL) - return NULL; - ndraw = radeon_draw(draw->radeon); - if (ndraw == NULL) { - return NULL; - } - for (i = 0; i < draw->nstate; i++) { - if (radeon_draw_set(ndraw, draw->state[i])) { - radeon_draw_decref(ndraw); - return NULL; - } - } - return ndraw; -} - -int radeon_draw_pm4(struct radeon_draw *draw) -{ - unsigned i; - int r; - - for (i = 0; i < draw->nstate; i++) { - r = radeon_state_pm4(draw->state[i]); - if (r) - return r; - } - return 0; -} diff --git a/src/gallium/winsys/r600/drm/radeon_priv.h b/src/gallium/winsys/r600/drm/radeon_priv.h index b91421f438..80392cda96 100644 --- a/src/gallium/winsys/r600/drm/radeon_priv.h +++ b/src/gallium/winsys/r600/drm/radeon_priv.h @@ -24,7 +24,6 @@ #include "radeon.h" struct radeon; -struct radeon_ctx; /* * radeon functions @@ -71,34 +70,6 @@ extern unsigned radeon_type_from_id(struct radeon *radeon, unsigned id); /* * radeon context functions */ -#pragma pack(1) -struct radeon_cs_reloc { - uint32_t handle; - uint32_t read_domain; - uint32_t write_domain; - uint32_t flags; -}; -#pragma pack() - -struct radeon_ctx { - int refcount; - struct radeon *radeon; - u32 *pm4; - u32 cpm4; - u32 draw_cpm4; - unsigned id; - unsigned next_id; - unsigned nreloc; - struct radeon_cs_reloc *reloc; - unsigned nbo; - struct radeon_bo **bo; - unsigned ndraw; - struct radeon_draw *cdraw; - struct radeon_draw **draw; - unsigned nstate; - struct radeon_state **state; -}; - int radeon_ctx_set_bo_new(struct radeon_ctx *ctx, struct radeon_bo *bo); struct radeon_bo *radeon_ctx_get_bo(struct radeon_ctx *ctx, unsigned reloc); void radeon_ctx_get_placement(struct radeon_ctx *ctx, unsigned reloc, u32 *placement); diff --git a/src/gallium/winsys/r600/drm/radeon_state.c b/src/gallium/winsys/r600/drm/radeon_state.c index 308288557a..d7cd1d7a94 100644 --- a/src/gallium/winsys/r600/drm/radeon_state.c +++ b/src/gallium/winsys/r600/drm/radeon_state.c @@ -32,82 +32,23 @@ /* * state core functions */ -struct radeon_state *radeon_state(struct radeon *radeon, u32 type, u32 id) +int radeon_state_init(struct radeon_state *state, struct radeon *radeon, u32 type, u32 id) { - struct radeon_state *state; - if (type > radeon->ntype) { fprintf(stderr, "%s invalid type %d\n", __func__, type); - return NULL; + return -EINVAL; } if (id > radeon->nstate) { fprintf(stderr, "%s invalid state id %d\n", __func__, id); - return NULL; + return -EINVAL; } - state = calloc(1, sizeof(*state)); - if (state == NULL) - return NULL; + memset(state, 0, sizeof(struct radeon_state)); state->radeon = radeon; state->type = type; state->id = id; - state->refcount = 1; state->npm4 = radeon->type[type].npm4; state->nstates = radeon->type[type].nstates; - state->states = calloc(1, state->nstates * 4); - state->pm4 = calloc(1, radeon->type[type].npm4 * 4); - if (state->states == NULL || state->pm4 == NULL) { - radeon_state_decref(state); - return NULL; - } - return state; -} - -struct radeon_state *radeon_state_duplicate(struct radeon_state *state) -{ - struct radeon_state *nstate = radeon_state(state->radeon, state->type, state->id); - unsigned i; - - if (state == NULL) - return NULL; - nstate->cpm4 = state->cpm4; - nstate->nbo = state->nbo; - nstate->nreloc = state->nreloc; - memcpy(nstate->states, state->states, state->nstates * 4); - memcpy(nstate->pm4, state->pm4, state->npm4 * 4); - memcpy(nstate->placement, state->placement, 8 * 4); - memcpy(nstate->reloc_pm4_id, state->reloc_pm4_id, 8 * 4); - memcpy(nstate->reloc_bo_id, state->reloc_bo_id, 8 * 4); - memcpy(nstate->bo_dirty, state->bo_dirty, 4 * 4); - for (i = 0; i < state->nbo; i++) { - nstate->bo[i] = radeon_bo_incref(state->radeon, state->bo[i]); - } - return nstate; -} - -struct radeon_state *radeon_state_incref(struct radeon_state *state) -{ - state->refcount++; - return state; -} - -struct radeon_state *radeon_state_decref(struct radeon_state *state) -{ - unsigned i; - - if (state == NULL) - return NULL; - if (--state->refcount > 0) { - return NULL; - } - for (i = 0; i < state->nbo; i++) { - state->bo[i] = radeon_bo_decref(state->radeon, state->bo[i]); - } - free(state->immd); - free(state->states); - free(state->pm4); - memset(state, 0, sizeof(*state)); - free(state); - return NULL; + return 0; } int radeon_state_replace_always(struct radeon_state *ostate, @@ -156,6 +97,7 @@ int radeon_state_pm4(struct radeon_state *state) return r; } state->pm4_crc = crc32(state->pm4, state->cpm4 * 4); + state->valid = 1; return 0; } -- cgit v1.2.3 From 3e231361f3291307bfaa27f1bbea13fecc3b2072 Mon Sep 17 00:00:00 2001 From: Luca Barbieri Date: Thu, 5 Aug 2010 13:07:41 +0200 Subject: nvfx: shut up unknown cap 64 warning --- src/gallium/drivers/nvfx/nvfx_screen.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nvfx/nvfx_screen.c b/src/gallium/drivers/nvfx/nvfx_screen.c index 80db28a07c..f2525ccb38 100644 --- a/src/gallium/drivers/nvfx/nvfx_screen.c +++ b/src/gallium/drivers/nvfx/nvfx_screen.c @@ -131,6 +131,8 @@ nvfx_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) return screen->is_nv4x ? 1 : 0; case PIPE_CAP_GEOMETRY_SHADER4: return 0; + case PIPE_CAP_DEPTH_CLAMP: + return 0; // TODO: implement depth clamp default: NOUVEAU_ERR("Unknown PIPE_CAP %d\n", param); return 0; -- cgit v1.2.3 From bf5ee5aa4f065b6e17a4cc9cd2e539c4290ff98c Mon Sep 17 00:00:00 2001 From: Luca Barbieri Date: Fri, 6 Aug 2010 00:58:48 +0200 Subject: nvfx: fix nv30 vertex program scalar opcodes Apparently they have always been broken, even before unification. Fixes a lot of stuff, starting from morph3d and lighting in teapot with textures disabled. --- src/gallium/drivers/nvfx/nvfx_vertprog.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nvfx/nvfx_vertprog.c b/src/gallium/drivers/nvfx/nvfx_vertprog.c index 80b98b62d3..24d9846310 100644 --- a/src/gallium/drivers/nvfx/nvfx_vertprog.c +++ b/src/gallium/drivers/nvfx/nvfx_vertprog.c @@ -299,7 +299,13 @@ nvfx_vp_arith(struct nvfx_context* nvfx, struct nvfx_vpc *vpc, int slot, int op, (3 << NVFX_VP(INST_COND_SWZ_W_SHIFT))); if(!nvfx->is_nv4x) { - hw[1] |= (op << NV30_VP_INST_VEC_OPCODE_SHIFT); + if(slot == 0) + hw[1] |= (op << NV30_VP_INST_VEC_OPCODE_SHIFT); + else + { + hw[0] |= ((op >> 4) << NV30_VP_INST_SCA_OPCODEH_SHIFT); + hw[1] |= ((op & 0xf) << NV30_VP_INST_SCA_OPCODEL_SHIFT); + } // hw[3] |= NVFX_VP(INST_SCA_DEST_TEMP_MASK); // hw[3] |= (mask << NVFX_VP(INST_VEC_WRITEMASK_SHIFT)); -- cgit v1.2.3 From e00b17830bb3193d5a24f6a9dfabb996e02c1d74 Mon Sep 17 00:00:00 2001 From: Jakob Bornecrantz Date: Thu, 5 Aug 2010 17:12:54 -0700 Subject: llvmpipe: Only get no rast option once --- src/gallium/drivers/llvmpipe/lp_context.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/llvmpipe/lp_context.c b/src/gallium/drivers/llvmpipe/lp_context.c index 3db4f12ebb..28793682ed 100644 --- a/src/gallium/drivers/llvmpipe/lp_context.c +++ b/src/gallium/drivers/llvmpipe/lp_context.c @@ -46,6 +46,10 @@ #include "lp_query.h" #include "lp_setup.h" + +DEBUG_GET_ONCE_BOOL_OPTION(lp_no_rast, "LP_NO_RAST", FALSE); + + static void llvmpipe_destroy( struct pipe_context *pipe ) { struct llvmpipe_context *llvmpipe = llvmpipe_context( pipe ); @@ -130,7 +134,7 @@ llvmpipe_create_context( struct pipe_screen *screen, void *priv ) /* FIXME: devise alternative to draw_texture_samplers */ - if (debug_get_bool_option( "LP_NO_RAST", FALSE )) + if (debug_get_option_lp_no_rast()) llvmpipe->no_rast = TRUE; llvmpipe->setup = lp_setup_create( &llvmpipe->pipe, -- cgit v1.2.3 From 4d10ec4f18cfb6c386feb65805713584ff730652 Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Fri, 6 Aug 2010 01:08:12 +0200 Subject: r300g: do not emit GB_Z_PEQ_CONFIG on non-r500 if DRM < 2.6.0 --- src/gallium/drivers/r300/r300_context.c | 7 +++++-- src/gallium/drivers/r300/r300_winsys.h | 1 + src/gallium/winsys/radeon/drm/radeon_drm.c | 4 ++++ src/gallium/winsys/radeon/drm/radeon_r300.c | 2 ++ src/gallium/winsys/radeon/drm/radeon_winsys.h | 6 ++++++ 5 files changed, 18 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c index 4658ab0ff5..e8b6c4f7af 100644 --- a/src/gallium/drivers/r300/r300_context.c +++ b/src/gallium/drivers/r300/r300_context.c @@ -171,6 +171,7 @@ static void r300_setup_atoms(struct r300_context* r300) boolean is_r500 = r300->screen->caps.is_r500; boolean has_tcl = r300->screen->caps.has_tcl; boolean drm_2_3_0 = r300->rws->get_value(r300->rws, R300_VID_DRM_2_3_0); + boolean drm_2_6_0 = r300->rws->get_value(r300->rws, R300_VID_DRM_2_6_0); boolean has_hyperz = r300->rws->get_value(r300->rws, R300_CAN_HYPERZ); boolean has_hiz_ram = r300->screen->caps.hiz_ram > 0; @@ -195,7 +196,7 @@ static void r300_setup_atoms(struct r300_context* r300) R300_INIT_ATOM(gpu_flush, 9); R300_INIT_ATOM(aa_state, 4); R300_INIT_ATOM(fb_state, 0); - R300_INIT_ATOM(hyperz_state, is_rv350 ? 10 : 8); + R300_INIT_ATOM(hyperz_state, is_r500 || (is_rv350 && drm_2_6_0) ? 10 : 8); /* ZB (unpipelined), SC. */ R300_INIT_ATOM(ztop_state, 2); /* ZB, FG. */ @@ -373,7 +374,9 @@ static void r300_init_states(struct pipe_context *pipe) OUT_CB_REG(R300_ZB_DEPTHCLEARVALUE, 0); OUT_CB_REG(R300_SC_HYPERZ, R300_SC_HYPERZ_ADJ_2); - if (r300->screen->caps.is_rv350) { + if (r300->screen->caps.is_r500 || + (r300->screen->caps.is_rv350 && + r300->rws->get_value(r300->rws, R300_VID_DRM_2_6_0))) { OUT_CB_REG(R300_GB_Z_PEQ_CONFIG, 0); } END_CB; diff --git a/src/gallium/drivers/r300/r300_winsys.h b/src/gallium/drivers/r300/r300_winsys.h index e7a1ede4fb..187780750f 100644 --- a/src/gallium/drivers/r300/r300_winsys.h +++ b/src/gallium/drivers/r300/r300_winsys.h @@ -49,6 +49,7 @@ enum r300_value_id { R300_VID_Z_PIPES, R300_VID_SQUARE_TILING_SUPPORT, R300_VID_DRM_2_3_0, + R300_VID_DRM_2_6_0, R300_CAN_HYPERZ, }; diff --git a/src/gallium/winsys/radeon/drm/radeon_drm.c b/src/gallium/winsys/radeon/drm/radeon_drm.c index 3604827700..ecaf096dea 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm.c +++ b/src/gallium/winsys/radeon/drm/radeon_drm.c @@ -103,6 +103,10 @@ static void do_ioctls(int fd, struct radeon_libdrm_winsys* winsys) winsys->drm_2_3_0 = version->version_major > 2 || version->version_minor >= 3; + winsys->drm_2_6_0 = version->version_major > 2 || + (version->version_major == 2 && + version->version_minor >= 6); + info.request = RADEON_INFO_DEVICE_ID; retval = drmCommandWriteRead(fd, DRM_RADEON_INFO, &info, sizeof(info)); if (retval) { diff --git a/src/gallium/winsys/radeon/drm/radeon_r300.c b/src/gallium/winsys/radeon/drm/radeon_r300.c index 955ae4c045..5840098642 100644 --- a/src/gallium/winsys/radeon/drm/radeon_r300.c +++ b/src/gallium/winsys/radeon/drm/radeon_r300.c @@ -211,6 +211,8 @@ static uint32_t radeon_get_value(struct r300_winsys_screen *rws, return ws->squaretiling; case R300_VID_DRM_2_3_0: return ws->drm_2_3_0; + case R300_VID_DRM_2_6_0: + return ws->drm_2_6_0; case R300_CAN_HYPERZ: return ws->hyperz; } diff --git a/src/gallium/winsys/radeon/drm/radeon_winsys.h b/src/gallium/winsys/radeon/drm/radeon_winsys.h index 52db0d62d2..6f4aa4bce3 100644 --- a/src/gallium/winsys/radeon/drm/radeon_winsys.h +++ b/src/gallium/winsys/radeon/drm/radeon_winsys.h @@ -65,6 +65,12 @@ struct radeon_libdrm_winsys { */ boolean drm_2_3_0; + /* DRM 2.6.0 + * - Hyper-Z + * - GB_Z_PEQ_CONFIG allowed on rv350->r4xx, we should initialize it + */ + boolean drm_2_6_0; + /* hyperz user */ boolean hyperz; -- cgit v1.2.3 From 121a625c1651ddc181e374ebdf16bc5c46f6eaa9 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Fri, 6 Aug 2010 11:44:31 +1000 Subject: r600g: add bo wait after map. --- src/gallium/drivers/r600/r600_texture.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/r600_texture.c b/src/gallium/drivers/r600/r600_texture.c index 033c71f6ef..1bce911306 100644 --- a/src/gallium/drivers/r600/r600_texture.c +++ b/src/gallium/drivers/r600/r600_texture.c @@ -240,6 +240,8 @@ void* r600_texture_transfer_map(struct pipe_context *ctx, if (radeon_bo_map(rscreen->rw, resource->bo)) { return NULL; } + radeon_bo_wait(rscreen->rw, resource->bo); + map = resource->bo->data; return map + rtransfer->offset + -- cgit v1.2.3 From 2b9036476511edd549d1c2cea6044eef4652a19c Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Fri, 6 Aug 2010 14:53:38 +1000 Subject: r600g: improve supported format selection. This fixes fbo-readpixels piglit test, and adds support for swapping the formats. Not all formats are correct yet I don't think. Signed-off-by: Dave Airlie --- src/gallium/drivers/r600/r600_draw.c | 6 +- src/gallium/drivers/r600/r600_helper.c | 99 ----------------- src/gallium/drivers/r600/r600_screen.c | 76 ++++++------- src/gallium/drivers/r600/r600_state.c | 15 ++- src/gallium/drivers/r600/r600_state_inlines.h | 153 ++++++++++++++++++++++++++ src/gallium/drivers/r600/r600d.h | 5 + 6 files changed, 212 insertions(+), 142 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/r600_draw.c b/src/gallium/drivers/r600/r600_draw.c index 7130bf2fa8..3a54cee2d9 100644 --- a/src/gallium/drivers/r600/r600_draw.c +++ b/src/gallium/drivers/r600/r600_draw.c @@ -34,7 +34,7 @@ #include "r600_screen.h" #include "r600_context.h" #include "r600_resource.h" -#include "r600d.h" +#include "r600_state_inlines.h" struct r600_draw { struct pipe_context *ctx; @@ -100,9 +100,7 @@ static int r600_draw_common(struct r600_draw *draw) vertex_buffer = &rctx->vertex_buffer[j]; rbuffer = (struct r600_resource*)vertex_buffer->buffer; offset = rctx->vertex_elements->elements[i].src_offset + vertex_buffer->buffer_offset; - r = r600_conv_pipe_format(rctx->vertex_elements->elements[i].src_format, &format); - if (r) - return r; + format = r600_translate_colorformat(rctx->vertex_elements->elements[i].src_format); r = radeon_state_init(&vs_resource, rscreen->rw, R600_VS_RESOURCE_TYPE, R600_VS_RESOURCE + i); if (r) return r; diff --git a/src/gallium/drivers/r600/r600_helper.c b/src/gallium/drivers/r600/r600_helper.c index c672fe7386..5e0e0aab57 100644 --- a/src/gallium/drivers/r600/r600_helper.c +++ b/src/gallium/drivers/r600/r600_helper.c @@ -30,105 +30,6 @@ #include "r600_context.h" #include "r600d.h" -int r600_conv_pipe_format(unsigned pformat, unsigned *format) -{ - switch (pformat) { - case PIPE_FORMAT_R32G32B32_FLOAT: - *format = 0x30; - return 0; - case PIPE_FORMAT_R32G32B32A32_FLOAT: - *format = V_0280A0_COLOR_32_32_32_32_FLOAT; - return 0; - case PIPE_FORMAT_A8R8G8B8_UNORM: - case PIPE_FORMAT_X8R8G8B8_UNORM: - case PIPE_FORMAT_B8G8R8A8_UNORM: - case PIPE_FORMAT_B8G8R8X8_UNORM: - case PIPE_FORMAT_R8G8B8A8_UNORM: - case PIPE_FORMAT_R8G8B8X8_UNORM: - case PIPE_FORMAT_R8G8B8A8_USCALED: - case PIPE_FORMAT_R8G8B8A8_SNORM: - case PIPE_FORMAT_R8G8B8A8_SSCALED: - *format = V_0280A0_COLOR_8_8_8_8; - return 0; - case PIPE_FORMAT_R32_FLOAT: - *format = V_0280A0_COLOR_32_FLOAT; - return 0; - case PIPE_FORMAT_R32G32_FLOAT: - *format = V_0280A0_COLOR_32_32_FLOAT; - return 0; - case PIPE_FORMAT_L8_UNORM: - case PIPE_FORMAT_A8_UNORM: - case PIPE_FORMAT_I8_UNORM: - *format = V_0280A0_COLOR_8; - return 0; - case PIPE_FORMAT_B4G4R4A4_UNORM: - *format = V_0280A0_COLOR_4_4_4_4; - return 0; - case PIPE_FORMAT_B5G6R5_UNORM: - *format = V_0280A0_COLOR_5_6_5; - return 0; - case PIPE_FORMAT_L16_UNORM: - case PIPE_FORMAT_Z16_UNORM: - case PIPE_FORMAT_Z32_UNORM: - case PIPE_FORMAT_Z32_FLOAT: - case PIPE_FORMAT_R64_FLOAT: - case PIPE_FORMAT_R64G64_FLOAT: - case PIPE_FORMAT_R64G64B64_FLOAT: - case PIPE_FORMAT_R64G64B64A64_FLOAT: - case PIPE_FORMAT_R32_UNORM: - case PIPE_FORMAT_R32G32_UNORM: - case PIPE_FORMAT_R32G32B32_UNORM: - case PIPE_FORMAT_R32G32B32A32_UNORM: - case PIPE_FORMAT_R32_USCALED: - case PIPE_FORMAT_R32G32_USCALED: - case PIPE_FORMAT_R32G32B32_USCALED: - case PIPE_FORMAT_R32G32B32A32_USCALED: - case PIPE_FORMAT_R32_SNORM: - case PIPE_FORMAT_R32G32_SNORM: - case PIPE_FORMAT_R32G32B32_SNORM: - case PIPE_FORMAT_R32G32B32A32_SNORM: - case PIPE_FORMAT_R32_SSCALED: - case PIPE_FORMAT_R32G32_SSCALED: - case PIPE_FORMAT_R32G32B32_SSCALED: - case PIPE_FORMAT_R32G32B32A32_SSCALED: - case PIPE_FORMAT_R16_UNORM: - case PIPE_FORMAT_R16G16_UNORM: - case PIPE_FORMAT_R16G16B16_UNORM: - case PIPE_FORMAT_R16G16B16A16_UNORM: - case PIPE_FORMAT_R16_USCALED: - case PIPE_FORMAT_R16G16_USCALED: - case PIPE_FORMAT_R16G16B16_USCALED: - case PIPE_FORMAT_R16G16B16A16_USCALED: - case PIPE_FORMAT_R16_SNORM: - case PIPE_FORMAT_R16G16_SNORM: - case PIPE_FORMAT_R16G16B16_SNORM: - case PIPE_FORMAT_R16G16B16A16_SNORM: - case PIPE_FORMAT_R16_SSCALED: - case PIPE_FORMAT_R16G16_SSCALED: - case PIPE_FORMAT_R16G16B16_SSCALED: - case PIPE_FORMAT_R16G16B16A16_SSCALED: - case PIPE_FORMAT_R8_UNORM: - case PIPE_FORMAT_R8G8_UNORM: - case PIPE_FORMAT_R8G8B8_UNORM: - case PIPE_FORMAT_R8_USCALED: - case PIPE_FORMAT_R8G8_USCALED: - case PIPE_FORMAT_R8G8B8_USCALED: - case PIPE_FORMAT_R8_SNORM: - case PIPE_FORMAT_R8G8_SNORM: - case PIPE_FORMAT_R8G8B8_SNORM: - case PIPE_FORMAT_R8_SSCALED: - case PIPE_FORMAT_R8G8_SSCALED: - case PIPE_FORMAT_R8G8B8_SSCALED: - case PIPE_FORMAT_R32_FIXED: - case PIPE_FORMAT_R32G32_FIXED: - case PIPE_FORMAT_R32G32B32_FIXED: - case PIPE_FORMAT_R32G32B32A32_FIXED: - default: - R600_ERR("unsupported %d\n", pformat); - return -EINVAL; - } -} - int r600_conv_pipe_prim(unsigned pprim, unsigned *prim) { switch (pprim) { diff --git a/src/gallium/drivers/r600/r600_screen.c b/src/gallium/drivers/r600/r600_screen.c index 68615ca162..4b87327a7c 100644 --- a/src/gallium/drivers/r600/r600_screen.c +++ b/src/gallium/drivers/r600/r600_screen.c @@ -32,6 +32,7 @@ #include "r600_context.h" #include "r600_public.h" #include "r600_resource.h" +#include "r600_state_inlines.h" static const char* r600_get_vendor(struct pipe_screen* pscreen) { @@ -133,50 +134,51 @@ static boolean r600_is_format_supported(struct pipe_screen* screen, enum pipe_format format, enum pipe_texture_target target, unsigned sample_count, - unsigned bindings, + unsigned usage, unsigned geom_flags) { + unsigned retval = 0; if (target >= PIPE_MAX_TEXTURE_TYPES) { R600_ERR("r600: unsupported texture type %d\n", target); return FALSE; } - switch (format) { - case PIPE_FORMAT_B4G4R4A4_UNORM: - case PIPE_FORMAT_B5G6R5_UNORM: - case PIPE_FORMAT_B5G5R5A1_UNORM: - case PIPE_FORMAT_A8_UNORM: - case PIPE_FORMAT_L8_UNORM: - case PIPE_FORMAT_A8R8G8B8_SRGB: - case PIPE_FORMAT_R8G8B8A8_SRGB: - case PIPE_FORMAT_DXT1_RGB: - case PIPE_FORMAT_DXT1_RGBA: - case PIPE_FORMAT_DXT3_RGBA: - case PIPE_FORMAT_DXT5_RGBA: - case PIPE_FORMAT_UYVY: - case PIPE_FORMAT_L8_SRGB: - case PIPE_FORMAT_L8A8_SRGB: - case PIPE_FORMAT_L8A8_UNORM: - case PIPE_FORMAT_A8R8G8B8_UNORM: - case PIPE_FORMAT_X8R8G8B8_UNORM: - case PIPE_FORMAT_R8G8B8A8_UNORM: - case PIPE_FORMAT_R8G8B8X8_UNORM: - case PIPE_FORMAT_B8G8R8A8_UNORM: - case PIPE_FORMAT_B8G8R8X8_UNORM: - case PIPE_FORMAT_A8B8G8R8_SRGB: - case PIPE_FORMAT_B8G8R8A8_SRGB: - case PIPE_FORMAT_I8_UNORM: - case PIPE_FORMAT_Z16_UNORM: - case PIPE_FORMAT_X8Z24_UNORM: - case PIPE_FORMAT_S8_USCALED_Z24_UNORM: - case PIPE_FORMAT_Z32_UNORM: - case PIPE_FORMAT_Z24_UNORM_S8_USCALED: - case PIPE_FORMAT_Z24X8_UNORM: - return TRUE; - default: - /* Unknown format... */ - break; + + /* Multisample */ + if (sample_count > 1) + return FALSE; + + if ((usage & PIPE_BIND_SAMPLER_VIEW) && + r600_is_sampler_format_supported(format)) { + retval |= PIPE_BIND_SAMPLER_VIEW; + } + + if ((usage & (PIPE_BIND_RENDER_TARGET | + PIPE_BIND_DISPLAY_TARGET | + PIPE_BIND_SCANOUT | + PIPE_BIND_SHARED)) && + r600_is_colorbuffer_format_supported(format)) { + retval |= usage & + (PIPE_BIND_RENDER_TARGET | + PIPE_BIND_DISPLAY_TARGET | + PIPE_BIND_SCANOUT | + PIPE_BIND_SHARED); } - return FALSE; + + if ((usage & PIPE_BIND_DEPTH_STENCIL) && + r600_is_zs_format_supported(format)) { + retval |= PIPE_BIND_DEPTH_STENCIL; + } + + if ((usage & PIPE_BIND_VERTEX_BUFFER) && + r600_is_vertex_format_supported(format)) + retval |= PIPE_BIND_VERTEX_BUFFER; + + if (usage & PIPE_BIND_TRANSFER_READ) + retval |= PIPE_BIND_TRANSFER_READ; + if (usage & PIPE_BIND_TRANSFER_WRITE) + retval |= PIPE_BIND_TRANSFER_WRITE; + + return retval == usage; } static void r600_destroy_screen(struct pipe_screen* pscreen) diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index 926a19cc6f..b8d50452e6 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -668,6 +668,8 @@ static int r600_cb0(struct r600_context *rctx, struct radeon_state *rstate) const struct pipe_framebuffer_state *state = &rctx->framebuffer->state.framebuffer; unsigned level = state->cbufs[0]->level; unsigned pitch, slice; + unsigned color_info; + unsigned format, swap; int r; r = radeon_state_init(rstate, rscreen->rw, R600_CB0_TYPE, R600_CB0); @@ -685,8 +687,16 @@ static int r600_cb0(struct r600_context *rctx, struct radeon_state *rstate) rstate->nbo = 3; pitch = (rtex->pitch[level] / rtex->bpt) / 8 - 1; slice = (rtex->pitch[level] / rtex->bpt) * state->cbufs[0]->height / 64 - 1; + + format = r600_translate_colorformat(rtex->resource.base.b.format); + swap = r600_translate_colorswap(rtex->resource.base.b.format); + color_info = S_0280A0_FORMAT(format) | + S_0280A0_COMP_SWAP(swap) | + S_0280A0_BLEND_CLAMP(1) | + S_0280A0_SOURCE_FORMAT(1); + rstate->states[R600_CB0__CB_COLOR0_BASE] = 0x00000000; - rstate->states[R600_CB0__CB_COLOR0_INFO] = 0x08110068; + rstate->states[R600_CB0__CB_COLOR0_INFO] = color_info; rstate->states[R600_CB0__CB_COLOR0_SIZE] = S_028060_PITCH_TILE_MAX(pitch) | S_028060_SLICE_TILE_MAX(slice); rstate->states[R600_CB0__CB_COLOR0_VIEW] = 0x00000000; @@ -1103,7 +1113,8 @@ static int r600_resource(struct r600_context *rctx, struct radeon_state *rstate, r = radeon_state_init(rstate, rscreen->rw, R600_PS_RESOURCE_TYPE, id); if (r) return r; - if (r600_conv_pipe_format(view->texture->format, &format)) + format = r600_translate_colorformat(view->texture->format); + if (format == ~0) return -EINVAL; desc = util_format_description(view->texture->format); if (desc == NULL) { diff --git a/src/gallium/drivers/r600/r600_state_inlines.h b/src/gallium/drivers/r600/r600_state_inlines.h index 321e75d7a1..b45089dcc1 100644 --- a/src/gallium/drivers/r600/r600_state_inlines.h +++ b/src/gallium/drivers/r600/r600_state_inlines.h @@ -23,6 +23,9 @@ #ifndef R600_STATE_INLINES_H #define R600_STATE_INLINES_H +#include "util/u_format.h" +#include "r600d.h" + static INLINE uint32_t r600_translate_blend_function(int blend_func) { switch (blend_func) { @@ -126,4 +129,154 @@ static INLINE uint32_t r600_translate_ds_func(int func) return func; } +static uint32_t r600_translate_colorswap(enum pipe_format format) +{ + switch (format) { + /* 8-bit buffers. */ + case PIPE_FORMAT_A8_UNORM: + case PIPE_FORMAT_I8_UNORM: + case PIPE_FORMAT_L8_UNORM: + case PIPE_FORMAT_R8_UNORM: + case PIPE_FORMAT_R8_SNORM: + return SWAP_STD; + + /* 16-bit buffers. */ + case PIPE_FORMAT_B5G6R5_UNORM: + return SWAP_STD_REV; + + case PIPE_FORMAT_B5G5R5A1_UNORM: + case PIPE_FORMAT_B5G5R5X1_UNORM: + return SWAP_ALT; + + case PIPE_FORMAT_B4G4R4A4_UNORM: + case PIPE_FORMAT_B4G4R4X4_UNORM: + return SWAP_ALT; + /* 32-bit buffers. */ + case PIPE_FORMAT_B8G8R8A8_UNORM: + case PIPE_FORMAT_B8G8R8X8_UNORM: + return SWAP_ALT; + + case PIPE_FORMAT_A8R8G8B8_UNORM: + case PIPE_FORMAT_X8R8G8B8_UNORM: + return SWAP_ALT_REV; + case PIPE_FORMAT_R8G8B8A8_SNORM: + case PIPE_FORMAT_R8G8B8X8_UNORM: + return SWAP_STD; + + case PIPE_FORMAT_A8B8G8R8_UNORM: + case PIPE_FORMAT_X8B8G8R8_UNORM: +// case PIPE_FORMAT_R8SG8SB8UX8U_NORM: + return SWAP_STD_REV; + + case PIPE_FORMAT_R10G10B10A2_UNORM: + case PIPE_FORMAT_R10G10B10X2_SNORM: + case PIPE_FORMAT_B10G10R10A2_UNORM: + case PIPE_FORMAT_R10SG10SB10SA2U_NORM: + return SWAP_STD_REV; + + /* 64-bit buffers. */ + case PIPE_FORMAT_R16G16B16A16_UNORM: + case PIPE_FORMAT_R16G16B16A16_SNORM: +// return V_0280A0_COLOR_16_16_16_16; + case PIPE_FORMAT_R16G16B16A16_FLOAT: +// return V_0280A0_COLOR_16_16_16_16_FLOAT; + + /* 128-bit buffers. */ + case PIPE_FORMAT_R32G32B32A32_FLOAT: +// return V_0280A0_COLOR_32_32_32_32_FLOAT; + return 0; + default: + R600_ERR("unsupported colorswap format %d\n", format); + return ~0; + } + return ~0; + +} + +static INLINE uint32_t r600_translate_colorformat(enum pipe_format format) +{ + switch (format) { + /* 8-bit buffers. */ + case PIPE_FORMAT_A8_UNORM: + case PIPE_FORMAT_I8_UNORM: + case PIPE_FORMAT_L8_UNORM: + case PIPE_FORMAT_R8_UNORM: + case PIPE_FORMAT_R8_SNORM: + return V_0280A0_COLOR_8; + + /* 16-bit buffers. */ + case PIPE_FORMAT_B5G6R5_UNORM: + return V_0280A0_COLOR_5_6_5; + + case PIPE_FORMAT_B5G5R5A1_UNORM: + case PIPE_FORMAT_B5G5R5X1_UNORM: + return V_0280A0_COLOR_1_5_5_5; + + case PIPE_FORMAT_B4G4R4A4_UNORM: + case PIPE_FORMAT_B4G4R4X4_UNORM: + return V_0280A0_COLOR_4_4_4_4; + + /* 32-bit buffers. */ + case PIPE_FORMAT_B8G8R8A8_UNORM: + case PIPE_FORMAT_B8G8R8X8_UNORM: + case PIPE_FORMAT_A8R8G8B8_UNORM: + case PIPE_FORMAT_X8R8G8B8_UNORM: + case PIPE_FORMAT_A8B8G8R8_UNORM: + case PIPE_FORMAT_R8G8B8A8_SNORM: + case PIPE_FORMAT_X8B8G8R8_UNORM: + case PIPE_FORMAT_R8G8B8X8_UNORM: + case PIPE_FORMAT_R8SG8SB8UX8U_NORM: + return V_0280A0_COLOR_8_8_8_8; + + case PIPE_FORMAT_R10G10B10A2_UNORM: + case PIPE_FORMAT_R10G10B10X2_SNORM: + case PIPE_FORMAT_B10G10R10A2_UNORM: + case PIPE_FORMAT_R10SG10SB10SA2U_NORM: + return V_0280A0_COLOR_10_10_10_2; + + /* 64-bit buffers. */ + case PIPE_FORMAT_R16G16B16A16_UNORM: + case PIPE_FORMAT_R16G16B16A16_SNORM: + return V_0280A0_COLOR_16_16_16_16; + case PIPE_FORMAT_R16G16B16A16_FLOAT: + return V_0280A0_COLOR_16_16_16_16_FLOAT; + case PIPE_FORMAT_R32G32_FLOAT: + return V_0280A0_COLOR_32_32_FLOAT; + + /* 128-bit buffers. */ + case PIPE_FORMAT_R32G32B32_FLOAT: + case PIPE_FORMAT_R32G32B32A32_FLOAT: + return V_0280A0_COLOR_32_32_32_32_FLOAT; + + /* YUV buffers. */ + case PIPE_FORMAT_UYVY: +// return R300_COLOR_FORMAT_YVYU; + case PIPE_FORMAT_YUYV: +// return R300_COLOR_FORMAT_VYUY; + default: + return ~0; /* Unsupported. */ + } +} + +static INLINE boolean r600_is_sampler_format_supported(enum pipe_format format) +{ + return r600_translate_colorformat(format) != ~0; +} + +static INLINE boolean r600_is_colorbuffer_format_supported(enum pipe_format format) +{ + return r600_translate_colorformat(format) != ~0 && + r600_translate_colorswap(format) != ~0; +} + +static INLINE boolean r600_is_zs_format_supported(enum pipe_format format) +{ + return TRUE; +} + +static INLINE boolean r600_is_vertex_format_supported(enum pipe_format format) +{ + return r600_translate_colorformat(format) != ~0; +} + #endif diff --git a/src/gallium/drivers/r600/r600d.h b/src/gallium/drivers/r600/r600d.h index 8205bdeadc..2d0ede20fa 100644 --- a/src/gallium/drivers/r600/r600d.h +++ b/src/gallium/drivers/r600/r600d.h @@ -1169,4 +1169,9 @@ #define G_0286D4_PNT_SPRITE_TOP_1(x) (((x) >> 14) & 0x1) #define C_0286D4_PNT_SPRITE_TOP_1 0xFFFFBFFF +/* temporary swap */ +#define SWAP_STD 0 +#define SWAP_ALT 1 +#define SWAP_STD_REV 2 +#define SWAP_ALT_REV 3 #endif -- cgit v1.2.3 From b8de7788a4b20c702b06402e2e6eed60467e2522 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Fri, 6 Aug 2010 14:54:24 +1000 Subject: r600g: fix targetmask to work correctly. At least this seems to fix the glean maskedClear test. --- src/gallium/drivers/r600/r600_state.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index b8d50452e6..e43e4afe55 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -1179,12 +1179,11 @@ static int r600_cb_cntl(struct r600_context *rctx, struct radeon_state *rstate) } else color_control |= (0xcc << 16); - target_mask |= (pbs->rt[0].colormask); for (i = 0; i < 8; i++) { if (pbs->rt[i].blend_enable) { color_control |= S_028808_TARGET_BLEND_ENABLE(1 << i); - target_mask |= (pbs->rt[0].colormask << (4 * i)); } + target_mask |= (pbs->rt[i].colormask << (4 * i)); } r = radeon_state_init(rstate, rscreen->rw, R600_CB_CNTL_TYPE, R600_CB_CNTL); if (r) -- cgit v1.2.3 From fc47cb9d710c046d34e8238337e009d7b76a3207 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Fri, 6 Aug 2010 15:06:25 +1000 Subject: r600g: fixup z format translations. this enables GL_EXT_packed_depth_stencil. fbo-d24s8 passes --- src/gallium/drivers/r600/r600_state.c | 20 +------------------- src/gallium/drivers/r600/r600_state_inlines.h | 24 +++++++++++++++++++++++- 2 files changed, 24 insertions(+), 20 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index e43e4afe55..82145617ca 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -706,22 +706,6 @@ static int r600_cb0(struct r600_context *rctx, struct radeon_state *rstate) return radeon_state_pm4(rstate); } -static int r600_db_format(unsigned pformat, unsigned *format) -{ - switch (pformat) { - case PIPE_FORMAT_Z24X8_UNORM: - *format = V_028010_DEPTH_X8_24; - return 0; - case PIPE_FORMAT_Z24_UNORM_S8_USCALED: - *format = V_028010_DEPTH_8_24; - return 0; - default: - *format = V_028010_DEPTH_INVALID; - R600_ERR("unsupported %d\n", pformat); - return -EINVAL; - } -} - static int r600_db(struct r600_context *rctx, struct radeon_state *rstate) { struct r600_screen *rscreen = rctx->screen; @@ -746,9 +730,7 @@ static int r600_db(struct r600_context *rctx, struct radeon_state *rstate) level = state->zsbuf->level; pitch = (rtex->pitch[level] / rtex->bpt) / 8 - 1; slice = (rtex->pitch[level] / rtex->bpt) * state->zsbuf->height / 64 - 1; - if (r600_db_format(state->zsbuf->texture->format, &format)) { - return -EINVAL; - } + format = r600_translate_dbformat(state->zsbuf->texture->format); rstate->states[R600_DB__DB_DEPTH_BASE] = 0x00000000; rstate->states[R600_DB__DB_DEPTH_INFO] = 0x00010000 | S_028010_FORMAT(format); diff --git a/src/gallium/drivers/r600/r600_state_inlines.h b/src/gallium/drivers/r600/r600_state_inlines.h index b45089dcc1..4a955da1c0 100644 --- a/src/gallium/drivers/r600/r600_state_inlines.h +++ b/src/gallium/drivers/r600/r600_state_inlines.h @@ -129,6 +129,20 @@ static INLINE uint32_t r600_translate_ds_func(int func) return func; } +static uint32_t r600_translate_dbformat(enum pipe_format format) +{ + switch (format) { + case PIPE_FORMAT_Z16_UNORM: + return V_028010_DEPTH_16; + case PIPE_FORMAT_Z24X8_UNORM: + return V_028010_DEPTH_X8_24; + case PIPE_FORMAT_Z24_UNORM_S8_USCALED: + return V_028010_DEPTH_8_24; + default: + return ~0; + } +} + static uint32_t r600_translate_colorswap(enum pipe_format format) { switch (format) { @@ -168,6 +182,10 @@ static uint32_t r600_translate_colorswap(enum pipe_format format) // case PIPE_FORMAT_R8SG8SB8UX8U_NORM: return SWAP_STD_REV; + case PIPE_FORMAT_Z24X8_UNORM: + case PIPE_FORMAT_Z24_UNORM_S8_USCALED: + return SWAP_STD; + case PIPE_FORMAT_R10G10B10A2_UNORM: case PIPE_FORMAT_R10G10B10X2_SNORM: case PIPE_FORMAT_B10G10R10A2_UNORM: @@ -234,6 +252,10 @@ static INLINE uint32_t r600_translate_colorformat(enum pipe_format format) case PIPE_FORMAT_R10SG10SB10SA2U_NORM: return V_0280A0_COLOR_10_10_10_2; + case PIPE_FORMAT_Z24X8_UNORM: + case PIPE_FORMAT_Z24_UNORM_S8_USCALED: + return V_0280A0_COLOR_24_8; + /* 64-bit buffers. */ case PIPE_FORMAT_R16G16B16A16_UNORM: case PIPE_FORMAT_R16G16B16A16_SNORM: @@ -271,7 +293,7 @@ static INLINE boolean r600_is_colorbuffer_format_supported(enum pipe_format form static INLINE boolean r600_is_zs_format_supported(enum pipe_format format) { - return TRUE; + return r600_translate_dbformat(format) != ~0; } static INLINE boolean r600_is_vertex_format_supported(enum pipe_format format) -- cgit v1.2.3 From 0a2a6c75bffc56d8dfde9b8a46c40222825630ea Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Fri, 6 Aug 2010 15:21:44 +1000 Subject: r600g: add SRGB support. This enables GL2.1 and passes glean's texture_srgb test. --- src/gallium/drivers/r600/r600_state.c | 17 +++++++++++++---- src/gallium/drivers/r600/r600_state_inlines.h | 8 ++++++++ src/gallium/drivers/r600/r600d.h | 2 ++ 3 files changed, 23 insertions(+), 4 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index 82145617ca..1a8ec48936 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -669,8 +669,9 @@ static int r600_cb0(struct r600_context *rctx, struct radeon_state *rstate) unsigned level = state->cbufs[0]->level; unsigned pitch, slice; unsigned color_info; - unsigned format, swap; + unsigned format, swap, ntype; int r; + const struct util_format_description *desc; r = radeon_state_init(rstate, rscreen->rw, R600_CB0_TYPE, R600_CB0); if (r) @@ -688,12 +689,19 @@ static int r600_cb0(struct r600_context *rctx, struct radeon_state *rstate) pitch = (rtex->pitch[level] / rtex->bpt) / 8 - 1; slice = (rtex->pitch[level] / rtex->bpt) * state->cbufs[0]->height / 64 - 1; + ntype = 0; + desc = util_format_description(rtex->resource.base.b.format); + if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) + ntype = NUM_FORMAT_SRGB; + format = r600_translate_colorformat(rtex->resource.base.b.format); swap = r600_translate_colorswap(rtex->resource.base.b.format); + color_info = S_0280A0_FORMAT(format) | - S_0280A0_COMP_SWAP(swap) | - S_0280A0_BLEND_CLAMP(1) | - S_0280A0_SOURCE_FORMAT(1); + S_0280A0_COMP_SWAP(swap) | + S_0280A0_BLEND_CLAMP(1) | + S_0280A0_SOURCE_FORMAT(1) | + S_0280A0_NUMBER_TYPE(ntype); rstate->states[R600_CB0__CB_COLOR0_BASE] = 0x00000000; rstate->states[R600_CB0__CB_COLOR0_INFO] = color_info; @@ -1136,6 +1144,7 @@ static int r600_resource(struct r600_context *rctx, struct radeon_state *rstate, S_038010_DST_SEL_Y(r600_tex_swizzle(view->swizzle_g)) | S_038010_DST_SEL_Z(r600_tex_swizzle(view->swizzle_r)) | S_038010_DST_SEL_W(r600_tex_swizzle(view->swizzle_a)) | + S_038010_FORCE_DEGAMMA(desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB ? 1 : 0) | S_038010_BASE_LEVEL(view->first_level); rstate->states[R600_PS_RESOURCE__RESOURCE0_WORD5] = S_038014_LAST_LEVEL(view->last_level) | diff --git a/src/gallium/drivers/r600/r600_state_inlines.h b/src/gallium/drivers/r600/r600_state_inlines.h index 4a955da1c0..fdc29386ae 100644 --- a/src/gallium/drivers/r600/r600_state_inlines.h +++ b/src/gallium/drivers/r600/r600_state_inlines.h @@ -166,6 +166,12 @@ static uint32_t r600_translate_colorswap(enum pipe_format format) case PIPE_FORMAT_B4G4R4X4_UNORM: return SWAP_ALT; /* 32-bit buffers. */ + + case PIPE_FORMAT_A8B8G8R8_SRGB: + return SWAP_STD_REV; + case PIPE_FORMAT_B8G8R8A8_SRGB: + return SWAP_ALT; + case PIPE_FORMAT_B8G8R8A8_UNORM: case PIPE_FORMAT_B8G8R8X8_UNORM: return SWAP_ALT; @@ -244,6 +250,8 @@ static INLINE uint32_t r600_translate_colorformat(enum pipe_format format) case PIPE_FORMAT_X8B8G8R8_UNORM: case PIPE_FORMAT_R8G8B8X8_UNORM: case PIPE_FORMAT_R8SG8SB8UX8U_NORM: + case PIPE_FORMAT_A8B8G8R8_SRGB: + case PIPE_FORMAT_B8G8R8A8_SRGB: return V_0280A0_COLOR_8_8_8_8; case PIPE_FORMAT_R10G10B10A2_UNORM: diff --git a/src/gallium/drivers/r600/r600d.h b/src/gallium/drivers/r600/r600d.h index 2d0ede20fa..f9cad93185 100644 --- a/src/gallium/drivers/r600/r600d.h +++ b/src/gallium/drivers/r600/r600d.h @@ -1174,4 +1174,6 @@ #define SWAP_ALT 1 #define SWAP_STD_REV 2 #define SWAP_ALT_REV 3 + +#define NUM_FORMAT_SRGB 6 #endif -- cgit v1.2.3 From 5f6ab5e259de826bb3795d90fdb0235c8997acb9 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Fri, 6 Aug 2010 15:29:50 +1000 Subject: r600g: start to fix up multiple targets. fixup exports from pixel shader for multi-cbs + depth buffer writing. Still crashes GPU running any of the multi-buffer or depth writing --- src/gallium/drivers/r600/r600_context.h | 2 +- src/gallium/drivers/r600/r600_shader.c | 18 +++++++++++-- src/gallium/drivers/r600/r600_state.c | 47 +++++++++++++++++++++++---------- 3 files changed, 50 insertions(+), 17 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/r600_context.h b/src/gallium/drivers/r600/r600_context.h index 78da88fef5..c83949de42 100644 --- a/src/gallium/drivers/r600/r600_context.h +++ b/src/gallium/drivers/r600/r600_context.h @@ -94,7 +94,7 @@ struct r600_context_hw_states { struct radeon_state dsa; struct radeon_state blend; struct radeon_state viewport; - struct radeon_state cb0; + struct radeon_state cb[7]; struct radeon_state config; struct radeon_state cb_cntl; struct radeon_state db; diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index f38aa7b463..d925dcbe4b 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -158,7 +158,7 @@ static int r600_pipe_shader_ps(struct pipe_context *ctx, struct r600_context_sta struct r600_screen *rscreen = r600_screen(ctx->screen); struct r600_shader *rshader = &rpshader->shader; struct radeon_state *state; - unsigned i, tmp; + unsigned i, tmp, exports_ps, num_cout; int r; r = radeon_state_init(&rpshader->rstate, rscreen->rw, R600_PS_SHADER_TYPE, R600_PS_SHADER); @@ -174,11 +174,22 @@ static int r600_pipe_shader_ps(struct pipe_context *ctx, struct r600_context_sta } state->states[R600_PS_SHADER__SPI_PS_INPUT_CNTL_0 + i] = tmp; } + + exports_ps = 0; + num_cout = 0; + for (i = 0; i < rshader->noutput; i++) { + if (rshader->output[i].name == TGSI_SEMANTIC_POSITION) + exports_ps |= 1; + else if (rshader->output[i].name == TGSI_SEMANTIC_COLOR) { + exports_ps |= (1 << (num_cout+1)); + num_cout++; + } + } state->states[R600_PS_SHADER__SPI_PS_IN_CONTROL_0] = S_0286CC_NUM_INTERP(rshader->ninput) | S_0286CC_PERSP_GRADIENT_ENA(1); state->states[R600_PS_SHADER__SPI_PS_IN_CONTROL_1] = 0x00000000; state->states[R600_PS_SHADER__SQ_PGM_RESOURCES_PS] = S_028868_NUM_GPRS(rshader->bc.ngpr); - state->states[R600_PS_SHADER__SQ_PGM_EXPORTS_PS] = 0x00000002; + state->states[R600_PS_SHADER__SQ_PGM_EXPORTS_PS] = exports_ps; rpshader->rstate.bo[0] = radeon_bo_incref(rscreen->rw, rpshader->bo); rpshader->rstate.nbo = 1; rpshader->rstate.placement[0] = RADEON_GEM_DOMAIN_GTT; @@ -431,6 +442,9 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s if (shader->output[i].name == TGSI_SEMANTIC_COLOR) { output.array_base = 0; output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; + } else if (shader->output[i].name == TGSI_SEMANTIC_POSITION) { + output.array_base = 61; + output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; } else { R600_ERR("unsupported fragment output name %d\n", shader->output[i].name); r = -EINVAL; diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index 1a8ec48936..c5e74d1efc 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -660,24 +660,25 @@ static int r600_blend(struct r600_context *rctx, struct radeon_state *rstate) return radeon_state_pm4(rstate); } -static int r600_cb0(struct r600_context *rctx, struct radeon_state *rstate) +static int r600_cb(struct r600_context *rctx, struct radeon_state *rstate, int cb) { struct r600_screen *rscreen = rctx->screen; struct r600_resource_texture *rtex; struct r600_resource *rbuffer; const struct pipe_framebuffer_state *state = &rctx->framebuffer->state.framebuffer; - unsigned level = state->cbufs[0]->level; + unsigned level = state->cbufs[cb]->level; unsigned pitch, slice; unsigned color_info; unsigned format, swap, ntype; int r; const struct util_format_description *desc; + int id = R600_CB0 + cb; - r = radeon_state_init(rstate, rscreen->rw, R600_CB0_TYPE, R600_CB0); + r = radeon_state_init(rstate, rscreen->rw, R600_CB0_TYPE, id); if (r) return r; - rtex = (struct r600_resource_texture*)state->cbufs[0]->texture; + rtex = (struct r600_resource_texture*)state->cbufs[cb]->texture; rbuffer = &rtex->resource; rstate->bo[0] = radeon_bo_incref(rscreen->rw, rbuffer->bo); rstate->bo[1] = radeon_bo_incref(rscreen->rw, rbuffer->bo); @@ -687,7 +688,7 @@ static int r600_cb0(struct r600_context *rctx, struct radeon_state *rstate) rstate->placement[4] = RADEON_GEM_DOMAIN_GTT; rstate->nbo = 3; pitch = (rtex->pitch[level] / rtex->bpt) / 8 - 1; - slice = (rtex->pitch[level] / rtex->bpt) * state->cbufs[0]->height / 64 - 1; + slice = (rtex->pitch[level] / rtex->bpt) * state->cbufs[cb]->height / 64 - 1; ntype = 0; desc = util_format_description(rtex->resource.base.b.format); @@ -878,14 +879,20 @@ static int r600_dsa(struct r600_context *rctx, struct radeon_state *rstate) const struct pipe_depth_stencil_alpha_state *state = &rctx->dsa->state.dsa; const struct pipe_stencil_ref *stencil_ref = &rctx->stencil_ref->state.stencil_ref; struct r600_screen *rscreen = rctx->screen; - unsigned db_depth_control, alpha_test_control, alpha_ref; + unsigned db_depth_control, alpha_test_control, alpha_ref, db_shader_control; unsigned stencil_ref_mask, stencil_ref_mask_bf; - int r; + int r, i; + struct r600_shader *rshader = &rctx->ps_shader->shader; r = radeon_state_init(rstate, rscreen->rw, R600_DSA_TYPE, R600_DSA); if (r) return r; + db_shader_control = 0x210; + for (i = 0; i < rshader->noutput; i++) { + if (rshader->output[i].name == TGSI_SEMANTIC_POSITION) + db_shader_control |= 1; + } stencil_ref_mask = 0; stencil_ref_mask_bf = 0; db_depth_control = S_028800_Z_ENABLE(state->depth.enabled) | @@ -933,7 +940,7 @@ static int r600_dsa(struct r600_context *rctx, struct radeon_state *rstate) rstate->states[R600_DSA__SPI_FOG_FUNC_BIAS] = 0x00000000; rstate->states[R600_DSA__SPI_FOG_CNTL] = 0x00000000; rstate->states[R600_DSA__DB_DEPTH_CONTROL] = db_depth_control; - rstate->states[R600_DSA__DB_SHADER_CONTROL] = 0x00000210; + rstate->states[R600_DSA__DB_SHADER_CONTROL] = db_shader_control; rstate->states[R600_DSA__DB_RENDER_CONTROL] = 0x00000060; rstate->states[R600_DSA__DB_RENDER_OVERRIDE] = 0x0000002A; rstate->states[R600_DSA__DB_SRESULTS_COMPARE_STATE1] = 0x00000000; @@ -1159,12 +1166,18 @@ static int r600_cb_cntl(struct r600_context *rctx, struct radeon_state *rstate) { struct r600_screen *rscreen = rctx->screen; const struct pipe_blend_state *pbs = &rctx->blend->state.blend; - uint32_t color_control, target_mask; + int nr_cbufs = rctx->framebuffer->state.framebuffer.nr_cbufs; + uint32_t color_control, target_mask, shader_mask; int i, r; target_mask = 0; + shader_mask = 0; color_control = S_028808_PER_MRT_BLEND(1); + for (i = 0; i < nr_cbufs; i++) { + shader_mask |= 0xf << i; + } + if (pbs->logicop_enable) { color_control |= (pbs->logicop_func) << 16; } else @@ -1175,11 +1188,13 @@ static int r600_cb_cntl(struct r600_context *rctx, struct radeon_state *rstate) color_control |= S_028808_TARGET_BLEND_ENABLE(1 << i); } target_mask |= (pbs->rt[i].colormask << (4 * i)); + } r = radeon_state_init(rstate, rscreen->rw, R600_CB_CNTL_TYPE, R600_CB_CNTL); if (r) return r; - rstate->states[R600_CB_CNTL__CB_SHADER_MASK] = 0x0000000F; + + rstate->states[R600_CB_CNTL__CB_SHADER_MASK] = shader_mask; rstate->states[R600_CB_CNTL__CB_TARGET_MASK] = target_mask; rstate->states[R600_CB_CNTL__CB_COLOR_CONTROL] = color_control; rstate->states[R600_CB_CNTL__PA_SC_AA_CONFIG] = 0x00000000; @@ -1197,6 +1212,7 @@ int r600_context_hw_states(struct r600_context *rctx) { unsigned i; int r; + int nr_cbufs = rctx->framebuffer->state.framebuffer.nr_cbufs; /* free previous TODO determine what need to be updated, what * doesn't @@ -1210,7 +1226,8 @@ int r600_context_hw_states(struct r600_context *rctx) r600_dsa(rctx, &rctx->hw_states.dsa); r600_blend(rctx, &rctx->hw_states.blend); r600_viewport(rctx, &rctx->hw_states.viewport); - r600_cb0(rctx, &rctx->hw_states.cb0); + for (i = 0; i < nr_cbufs; i++) + r600_cb(rctx, &rctx->hw_states.cb[i], i); r600_db(rctx, &rctx->hw_states.db); r600_cb_cntl(rctx, &rctx->hw_states.cb_cntl); @@ -1250,9 +1267,11 @@ int r600_context_hw_states(struct r600_context *rctx) r = radeon_draw_set(&rctx->draw, &rctx->hw_states.viewport); if (r) return r; - r = radeon_draw_set(&rctx->draw, &rctx->hw_states.cb0); - if (r) - return r; + for (i = 0; i < nr_cbufs; i++) { + r = radeon_draw_set(&rctx->draw, &rctx->hw_states.cb[i]); + if (r) + return r; + } r = radeon_draw_set(&rctx->draw, &rctx->hw_states.config); if (r) return r; -- cgit v1.2.3 From a6859f5cccbec9f01d1ceea7f8ba055b787ff299 Mon Sep 17 00:00:00 2001 From: Vinson Lee Date: Thu, 5 Aug 2010 23:58:00 -0700 Subject: r300g: Remove unnecessary headers. --- src/gallium/drivers/r300/r300_hyperz.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_hyperz.c b/src/gallium/drivers/r300/r300_hyperz.c index e719342a46..10e440ce30 100644 --- a/src/gallium/drivers/r300/r300_hyperz.c +++ b/src/gallium/drivers/r300/r300_hyperz.c @@ -27,8 +27,6 @@ #include "r300_hyperz.h" #include "r300_reg.h" #include "r300_fs.h" -#include "r300_emit.h" -#include "r300_texture.h" /* HiZ rules - taken from various docs -- cgit v1.2.3 From 6e6103004c9c737297b842a4aff298da920e7c33 Mon Sep 17 00:00:00 2001 From: Jerome Glisse Date: Fri, 6 Aug 2010 10:59:27 -0400 Subject: Revert "r600g: don't use dynamic state allocation for states" This reverts commit 9c949d4a4dd43b7889e13bdf683bcf211f049ced. Conflicts: src/gallium/drivers/r600/r600_context.h src/gallium/drivers/r600/r600_draw.c src/gallium/drivers/r600/r600_shader.c src/gallium/drivers/r600/r600_state.c --- src/gallium/drivers/r600/r600_context.c | 141 +++++++------- src/gallium/drivers/r600/r600_context.h | 29 ++- src/gallium/drivers/r600/r600_draw.c | 104 +++++------ src/gallium/drivers/r600/r600_shader.c | 34 ++-- src/gallium/drivers/r600/r600_state.c | 283 ++++++++++++++++++----------- src/gallium/drivers/r600/radeon.h | 52 ++---- src/gallium/targets/dri-r600/Makefile | 4 +- src/gallium/winsys/r600/drm/radeon.c | 11 ++ src/gallium/winsys/r600/drm/radeon_ctx.c | 160 +++++++++++----- src/gallium/winsys/r600/drm/radeon_draw.c | 92 +++++++++- src/gallium/winsys/r600/drm/radeon_priv.h | 29 +++ src/gallium/winsys/r600/drm/radeon_state.c | 70 ++++++- 12 files changed, 647 insertions(+), 362 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/r600_context.c b/src/gallium/drivers/r600/r600_context.c index f7732d8952..ae1780a1d4 100644 --- a/src/gallium/drivers/r600/r600_context.c +++ b/src/gallium/drivers/r600/r600_context.c @@ -48,14 +48,18 @@ void r600_flush(struct pipe_context *ctx, unsigned flags, struct r600_screen *rscreen = rctx->screen; static int dc = 0; - if (radeon_ctx_pm4(&rctx->ctx)) + if (radeon_ctx_pm4(rctx->ctx)) return; /* FIXME dumping should be removed once shader support instructions * without throwing bad code */ if (!dc) - radeon_ctx_dump_bof(&rctx->ctx, "gallium.bof"); - radeon_ctx_submit(&rctx->ctx); + radeon_ctx_dump_bof(rctx->ctx, "gallium.bof"); +#if 1 + radeon_ctx_submit(rctx->ctx); +#endif + rctx->ctx = radeon_ctx_decref(rctx->ctx); + rctx->ctx = radeon_ctx(rscreen->rw); dc++; } @@ -216,8 +220,9 @@ static void r600_init_config(struct r600_context *rctx) printf("num_gs_stack_entries : %d\n", num_gs_stack_entries); printf("num_es_stack_entries : %d\n", num_es_stack_entries); - radeon_state_init(&rctx->config, rctx->rw, R600_CONFIG_TYPE, R600_CONFIG); - rctx->config.states[R600_CONFIG__SQ_CONFIG] = 0x00000000; + rctx->hw_states.config = radeon_state(rctx->rw, R600_CONFIG_TYPE, R600_CONFIG); + + rctx->hw_states.config->states[R600_CONFIG__SQ_CONFIG] = 0x00000000; switch (family) { case CHIP_RV610: case CHIP_RV620: @@ -226,75 +231,75 @@ static void r600_init_config(struct r600_context *rctx) case CHIP_RV710: break; default: - rctx->config.states[R600_CONFIG__SQ_CONFIG] |= S_008C00_VC_ENABLE(1); + rctx->hw_states.config->states[R600_CONFIG__SQ_CONFIG] |= S_008C00_VC_ENABLE(1); break; } - rctx->config.states[R600_CONFIG__SQ_CONFIG] |= S_008C00_DX9_CONSTS(1); - rctx->config.states[R600_CONFIG__SQ_CONFIG] |= S_008C00_ALU_INST_PREFER_VECTOR(1); - rctx->config.states[R600_CONFIG__SQ_CONFIG] |= S_008C00_PS_PRIO(ps_prio); - rctx->config.states[R600_CONFIG__SQ_CONFIG] |= S_008C00_VS_PRIO(vs_prio); - rctx->config.states[R600_CONFIG__SQ_CONFIG] |= S_008C00_GS_PRIO(gs_prio); - rctx->config.states[R600_CONFIG__SQ_CONFIG] |= S_008C00_ES_PRIO(es_prio); + rctx->hw_states.config->states[R600_CONFIG__SQ_CONFIG] |= S_008C00_DX9_CONSTS(1); + rctx->hw_states.config->states[R600_CONFIG__SQ_CONFIG] |= S_008C00_ALU_INST_PREFER_VECTOR(1); + rctx->hw_states.config->states[R600_CONFIG__SQ_CONFIG] |= S_008C00_PS_PRIO(ps_prio); + rctx->hw_states.config->states[R600_CONFIG__SQ_CONFIG] |= S_008C00_VS_PRIO(vs_prio); + rctx->hw_states.config->states[R600_CONFIG__SQ_CONFIG] |= S_008C00_GS_PRIO(gs_prio); + rctx->hw_states.config->states[R600_CONFIG__SQ_CONFIG] |= S_008C00_ES_PRIO(es_prio); - rctx->config.states[R600_CONFIG__SQ_GPR_RESOURCE_MGMT_1] = 0; - rctx->config.states[R600_CONFIG__SQ_GPR_RESOURCE_MGMT_1] |= S_008C04_NUM_PS_GPRS(num_ps_gprs); - rctx->config.states[R600_CONFIG__SQ_GPR_RESOURCE_MGMT_1] |= S_008C04_NUM_VS_GPRS(num_vs_gprs); - rctx->config.states[R600_CONFIG__SQ_GPR_RESOURCE_MGMT_1] |= S_008C04_NUM_CLAUSE_TEMP_GPRS(num_temp_gprs); + rctx->hw_states.config->states[R600_CONFIG__SQ_GPR_RESOURCE_MGMT_1] = 0; + rctx->hw_states.config->states[R600_CONFIG__SQ_GPR_RESOURCE_MGMT_1] |= S_008C04_NUM_PS_GPRS(num_ps_gprs); + rctx->hw_states.config->states[R600_CONFIG__SQ_GPR_RESOURCE_MGMT_1] |= S_008C04_NUM_VS_GPRS(num_vs_gprs); + rctx->hw_states.config->states[R600_CONFIG__SQ_GPR_RESOURCE_MGMT_1] |= S_008C04_NUM_CLAUSE_TEMP_GPRS(num_temp_gprs); - rctx->config.states[R600_CONFIG__SQ_GPR_RESOURCE_MGMT_2] = 0; - rctx->config.states[R600_CONFIG__SQ_GPR_RESOURCE_MGMT_2] |= S_008C08_NUM_GS_GPRS(num_gs_gprs); - rctx->config.states[R600_CONFIG__SQ_GPR_RESOURCE_MGMT_2] |= S_008C08_NUM_GS_GPRS(num_es_gprs); + rctx->hw_states.config->states[R600_CONFIG__SQ_GPR_RESOURCE_MGMT_2] = 0; + rctx->hw_states.config->states[R600_CONFIG__SQ_GPR_RESOURCE_MGMT_2] |= S_008C08_NUM_GS_GPRS(num_gs_gprs); + rctx->hw_states.config->states[R600_CONFIG__SQ_GPR_RESOURCE_MGMT_2] |= S_008C08_NUM_GS_GPRS(num_es_gprs); - rctx->config.states[R600_CONFIG__SQ_THREAD_RESOURCE_MGMT] = 0; - rctx->config.states[R600_CONFIG__SQ_THREAD_RESOURCE_MGMT] |= S_008C0C_NUM_PS_THREADS(num_ps_threads); - rctx->config.states[R600_CONFIG__SQ_THREAD_RESOURCE_MGMT] |= S_008C0C_NUM_VS_THREADS(num_vs_threads); - rctx->config.states[R600_CONFIG__SQ_THREAD_RESOURCE_MGMT] |= S_008C0C_NUM_GS_THREADS(num_gs_threads); - rctx->config.states[R600_CONFIG__SQ_THREAD_RESOURCE_MGMT] |= S_008C0C_NUM_ES_THREADS(num_es_threads); + rctx->hw_states.config->states[R600_CONFIG__SQ_THREAD_RESOURCE_MGMT] = 0; + rctx->hw_states.config->states[R600_CONFIG__SQ_THREAD_RESOURCE_MGMT] |= S_008C0C_NUM_PS_THREADS(num_ps_threads); + rctx->hw_states.config->states[R600_CONFIG__SQ_THREAD_RESOURCE_MGMT] |= S_008C0C_NUM_VS_THREADS(num_vs_threads); + rctx->hw_states.config->states[R600_CONFIG__SQ_THREAD_RESOURCE_MGMT] |= S_008C0C_NUM_GS_THREADS(num_gs_threads); + rctx->hw_states.config->states[R600_CONFIG__SQ_THREAD_RESOURCE_MGMT] |= S_008C0C_NUM_ES_THREADS(num_es_threads); - rctx->config.states[R600_CONFIG__SQ_STACK_RESOURCE_MGMT_1] = 0; - rctx->config.states[R600_CONFIG__SQ_STACK_RESOURCE_MGMT_1] |= S_008C10_NUM_PS_STACK_ENTRIES(num_ps_stack_entries); - rctx->config.states[R600_CONFIG__SQ_STACK_RESOURCE_MGMT_1] |= S_008C10_NUM_VS_STACK_ENTRIES(num_vs_stack_entries); + rctx->hw_states.config->states[R600_CONFIG__SQ_STACK_RESOURCE_MGMT_1] = 0; + rctx->hw_states.config->states[R600_CONFIG__SQ_STACK_RESOURCE_MGMT_1] |= S_008C10_NUM_PS_STACK_ENTRIES(num_ps_stack_entries); + rctx->hw_states.config->states[R600_CONFIG__SQ_STACK_RESOURCE_MGMT_1] |= S_008C10_NUM_VS_STACK_ENTRIES(num_vs_stack_entries); - rctx->config.states[R600_CONFIG__SQ_STACK_RESOURCE_MGMT_2] = 0; - rctx->config.states[R600_CONFIG__SQ_STACK_RESOURCE_MGMT_2] |= S_008C14_NUM_GS_STACK_ENTRIES(num_gs_stack_entries); - rctx->config.states[R600_CONFIG__SQ_STACK_RESOURCE_MGMT_2] |= S_008C14_NUM_ES_STACK_ENTRIES(num_es_stack_entries); + rctx->hw_states.config->states[R600_CONFIG__SQ_STACK_RESOURCE_MGMT_2] = 0; + rctx->hw_states.config->states[R600_CONFIG__SQ_STACK_RESOURCE_MGMT_2] |= S_008C14_NUM_GS_STACK_ENTRIES(num_gs_stack_entries); + rctx->hw_states.config->states[R600_CONFIG__SQ_STACK_RESOURCE_MGMT_2] |= S_008C14_NUM_ES_STACK_ENTRIES(num_es_stack_entries); - rctx->config.states[R600_CONFIG__SQ_DYN_GPR_CNTL_PS_FLUSH_REQ] = 0x00004000; - rctx->config.states[R600_CONFIG__TA_CNTL_AUX] = 0x07000002; - rctx->config.states[R600_CONFIG__VC_ENHANCE] = 0x00000000; - rctx->config.states[R600_CONFIG__DB_DEBUG] = 0x00000000; - rctx->config.states[R600_CONFIG__DB_WATERMARKS] = 0x00420204; - rctx->config.states[R600_CONFIG__SX_MISC] = 0x00000000; - rctx->config.states[R600_CONFIG__SPI_THREAD_GROUPING] = 0x00000001; - rctx->config.states[R600_CONFIG__CB_SHADER_CONTROL] = 0x00000003; - rctx->config.states[R600_CONFIG__SQ_ESGS_RING_ITEMSIZE] = 0x00000000; - rctx->config.states[R600_CONFIG__SQ_GSVS_RING_ITEMSIZE] = 0x00000000; - rctx->config.states[R600_CONFIG__SQ_ESTMP_RING_ITEMSIZE] = 0x00000000; - rctx->config.states[R600_CONFIG__SQ_GSTMP_RING_ITEMSIZE] = 0x00000000; - rctx->config.states[R600_CONFIG__SQ_VSTMP_RING_ITEMSIZE] = 0x00000000; - rctx->config.states[R600_CONFIG__SQ_PSTMP_RING_ITEMSIZE] = 0x00000000; - rctx->config.states[R600_CONFIG__SQ_FBUF_RING_ITEMSIZE] = 0x00000000; - rctx->config.states[R600_CONFIG__SQ_REDUC_RING_ITEMSIZE] = 0x00000000; - rctx->config.states[R600_CONFIG__SQ_GS_VERT_ITEMSIZE] = 0x00000000; - rctx->config.states[R600_CONFIG__VGT_OUTPUT_PATH_CNTL] = 0x00000000; - rctx->config.states[R600_CONFIG__VGT_HOS_CNTL] = 0x00000000; - rctx->config.states[R600_CONFIG__VGT_HOS_MAX_TESS_LEVEL] = 0x00000000; - rctx->config.states[R600_CONFIG__VGT_HOS_MIN_TESS_LEVEL] = 0x00000000; - rctx->config.states[R600_CONFIG__VGT_HOS_REUSE_DEPTH] = 0x00000000; - rctx->config.states[R600_CONFIG__VGT_GROUP_PRIM_TYPE] = 0x00000000; - rctx->config.states[R600_CONFIG__VGT_GROUP_FIRST_DECR] = 0x00000000; - rctx->config.states[R600_CONFIG__VGT_GROUP_DECR] = 0x00000000; - rctx->config.states[R600_CONFIG__VGT_GROUP_VECT_0_CNTL] = 0x00000000; - rctx->config.states[R600_CONFIG__VGT_GROUP_VECT_1_CNTL] = 0x00000000; - rctx->config.states[R600_CONFIG__VGT_GROUP_VECT_0_FMT_CNTL] = 0x00000000; - rctx->config.states[R600_CONFIG__VGT_GROUP_VECT_1_FMT_CNTL] = 0x00000000; - rctx->config.states[R600_CONFIG__VGT_GS_MODE] = 0x00000000; - rctx->config.states[R600_CONFIG__PA_SC_MODE_CNTL] = 0x00514000; - rctx->config.states[R600_CONFIG__VGT_STRMOUT_EN] = 0x00000000; - rctx->config.states[R600_CONFIG__VGT_REUSE_OFF] = 0x00000001; - rctx->config.states[R600_CONFIG__VGT_VTX_CNT_EN] = 0x00000000; - rctx->config.states[R600_CONFIG__VGT_STRMOUT_BUFFER_EN] = 0x00000000; - radeon_state_pm4(&rctx->config); + rctx->hw_states.config->states[R600_CONFIG__SQ_DYN_GPR_CNTL_PS_FLUSH_REQ] = 0x00004000; + rctx->hw_states.config->states[R600_CONFIG__TA_CNTL_AUX] = 0x07000002; + rctx->hw_states.config->states[R600_CONFIG__VC_ENHANCE] = 0x00000000; + rctx->hw_states.config->states[R600_CONFIG__DB_DEBUG] = 0x00000000; + rctx->hw_states.config->states[R600_CONFIG__DB_WATERMARKS] = 0x00420204; + rctx->hw_states.config->states[R600_CONFIG__SX_MISC] = 0x00000000; + rctx->hw_states.config->states[R600_CONFIG__SPI_THREAD_GROUPING] = 0x00000001; + rctx->hw_states.config->states[R600_CONFIG__CB_SHADER_CONTROL] = 0x00000003; + rctx->hw_states.config->states[R600_CONFIG__SQ_ESGS_RING_ITEMSIZE] = 0x00000000; + rctx->hw_states.config->states[R600_CONFIG__SQ_GSVS_RING_ITEMSIZE] = 0x00000000; + rctx->hw_states.config->states[R600_CONFIG__SQ_ESTMP_RING_ITEMSIZE] = 0x00000000; + rctx->hw_states.config->states[R600_CONFIG__SQ_GSTMP_RING_ITEMSIZE] = 0x00000000; + rctx->hw_states.config->states[R600_CONFIG__SQ_VSTMP_RING_ITEMSIZE] = 0x00000000; + rctx->hw_states.config->states[R600_CONFIG__SQ_PSTMP_RING_ITEMSIZE] = 0x00000000; + rctx->hw_states.config->states[R600_CONFIG__SQ_FBUF_RING_ITEMSIZE] = 0x00000000; + rctx->hw_states.config->states[R600_CONFIG__SQ_REDUC_RING_ITEMSIZE] = 0x00000000; + rctx->hw_states.config->states[R600_CONFIG__SQ_GS_VERT_ITEMSIZE] = 0x00000000; + rctx->hw_states.config->states[R600_CONFIG__VGT_OUTPUT_PATH_CNTL] = 0x00000000; + rctx->hw_states.config->states[R600_CONFIG__VGT_HOS_CNTL] = 0x00000000; + rctx->hw_states.config->states[R600_CONFIG__VGT_HOS_MAX_TESS_LEVEL] = 0x00000000; + rctx->hw_states.config->states[R600_CONFIG__VGT_HOS_MIN_TESS_LEVEL] = 0x00000000; + rctx->hw_states.config->states[R600_CONFIG__VGT_HOS_REUSE_DEPTH] = 0x00000000; + rctx->hw_states.config->states[R600_CONFIG__VGT_GROUP_PRIM_TYPE] = 0x00000000; + rctx->hw_states.config->states[R600_CONFIG__VGT_GROUP_FIRST_DECR] = 0x00000000; + rctx->hw_states.config->states[R600_CONFIG__VGT_GROUP_DECR] = 0x00000000; + rctx->hw_states.config->states[R600_CONFIG__VGT_GROUP_VECT_0_CNTL] = 0x00000000; + rctx->hw_states.config->states[R600_CONFIG__VGT_GROUP_VECT_1_CNTL] = 0x00000000; + rctx->hw_states.config->states[R600_CONFIG__VGT_GROUP_VECT_0_FMT_CNTL] = 0x00000000; + rctx->hw_states.config->states[R600_CONFIG__VGT_GROUP_VECT_1_FMT_CNTL] = 0x00000000; + rctx->hw_states.config->states[R600_CONFIG__VGT_GS_MODE] = 0x00000000; + rctx->hw_states.config->states[R600_CONFIG__PA_SC_MODE_CNTL] = 0x00514000; + rctx->hw_states.config->states[R600_CONFIG__VGT_STRMOUT_EN] = 0x00000000; + rctx->hw_states.config->states[R600_CONFIG__VGT_REUSE_OFF] = 0x00000001; + rctx->hw_states.config->states[R600_CONFIG__VGT_VTX_CNT_EN] = 0x00000000; + rctx->hw_states.config->states[R600_CONFIG__VGT_STRMOUT_BUFFER_EN] = 0x00000000; + radeon_state_pm4(rctx->hw_states.config); } struct pipe_context *r600_create_context(struct pipe_screen *screen, void *priv) @@ -328,7 +333,7 @@ struct pipe_context *r600_create_context(struct pipe_screen *screen, void *priv) r600_init_config(rctx); - radeon_ctx_init(&rctx->ctx, rscreen->rw); - radeon_draw_init(&rctx->draw, rscreen->rw); + rctx->ctx = radeon_ctx(rscreen->rw); + rctx->draw = radeon_draw(rscreen->rw); return &rctx->context; } diff --git a/src/gallium/drivers/r600/r600_context.h b/src/gallium/drivers/r600/r600_context.h index c83949de42..431f8951b2 100644 --- a/src/gallium/drivers/r600/r600_context.h +++ b/src/gallium/drivers/r600/r600_context.h @@ -76,7 +76,7 @@ struct r600_context_state { union pipe_states state; unsigned refcount; unsigned type; - struct radeon_state rstate; + struct radeon_state *rstate; struct r600_shader shader; struct radeon_bo *bo; }; @@ -89,28 +89,28 @@ struct r600_vertex_element }; struct r600_context_hw_states { - struct radeon_state rasterizer; - struct radeon_state scissor; - struct radeon_state dsa; - struct radeon_state blend; - struct radeon_state viewport; - struct radeon_state cb[7]; - struct radeon_state config; - struct radeon_state cb_cntl; - struct radeon_state db; + struct radeon_state *rasterizer; + struct radeon_state *scissor; + struct radeon_state *dsa; + struct radeon_state *blend; + struct radeon_state *viewport; + struct radeon_state *cb[7]; + struct radeon_state *config; + struct radeon_state *cb_cntl; + struct radeon_state *db; unsigned ps_nresource; unsigned ps_nsampler; - struct radeon_state ps_resource[160]; - struct radeon_state ps_sampler[16]; + struct radeon_state *ps_resource[160]; + struct radeon_state *ps_sampler[16]; }; struct r600_context { struct pipe_context context; struct r600_screen *screen; struct radeon *rw; - struct radeon_ctx ctx; + struct radeon_ctx *ctx; struct blitter_context *blitter; - struct radeon_draw draw; + struct radeon_draw *draw; /* hw states */ struct r600_context_hw_states hw_states; /* pipe states */ @@ -120,7 +120,6 @@ struct r600_context { unsigned ps_nsampler_view; unsigned vs_nsampler_view; unsigned nvertex_buffer; - struct radeon_state config; struct r600_context_state *rasterizer; struct r600_context_state *poly_stipple; struct r600_context_state *scissor; diff --git a/src/gallium/drivers/r600/r600_draw.c b/src/gallium/drivers/r600/r600_draw.c index 3a54cee2d9..2420b76318 100644 --- a/src/gallium/drivers/r600/r600_draw.c +++ b/src/gallium/drivers/r600/r600_draw.c @@ -38,8 +38,8 @@ struct r600_draw { struct pipe_context *ctx; - struct radeon_state draw; - struct radeon_state vgt; + struct radeon_state *draw; + struct radeon_state *vgt; unsigned mode; unsigned start; unsigned count; @@ -51,7 +51,7 @@ static int r600_draw_common(struct r600_draw *draw) { struct r600_context *rctx = r600_context(draw->ctx); struct r600_screen *rscreen = rctx->screen; - struct radeon_state vs_resource; + struct radeon_state *vs_resource; struct r600_resource *rbuffer; unsigned i, j, offset, format, prim; u32 vgt_dma_index_type, vgt_draw_initiator; @@ -88,10 +88,10 @@ static int r600_draw_common(struct r600_draw *draw) r = r600_pipe_shader_update(draw->ctx, rctx->ps_shader); if (r) return r; - r = radeon_draw_set(&rctx->draw, &rctx->vs_shader->rstate); + r = radeon_draw_set(rctx->draw, rctx->vs_shader->rstate); if (r) return r; - r = radeon_draw_set(&rctx->draw, &rctx->ps_shader->rstate); + r = radeon_draw_set(rctx->draw, rctx->ps_shader->rstate); if (r) return r; @@ -101,68 +101,68 @@ static int r600_draw_common(struct r600_draw *draw) rbuffer = (struct r600_resource*)vertex_buffer->buffer; offset = rctx->vertex_elements->elements[i].src_offset + vertex_buffer->buffer_offset; format = r600_translate_colorformat(rctx->vertex_elements->elements[i].src_format); - r = radeon_state_init(&vs_resource, rscreen->rw, R600_VS_RESOURCE_TYPE, R600_VS_RESOURCE + i); - if (r) - return r; - vs_resource.bo[0] = radeon_bo_incref(rscreen->rw, rbuffer->bo); - vs_resource.nbo = 1; - vs_resource.states[R600_PS_RESOURCE__RESOURCE0_WORD0] = offset; - vs_resource.states[R600_PS_RESOURCE__RESOURCE0_WORD1] = rbuffer->bo->size - offset; - vs_resource.states[R600_PS_RESOURCE__RESOURCE0_WORD2] = S_038008_STRIDE(vertex_buffer->stride) | + vs_resource = radeon_state(rscreen->rw, R600_VS_RESOURCE_TYPE, R600_VS_RESOURCE + i); + if (vs_resource == NULL) + return -ENOMEM; + vs_resource->bo[0] = radeon_bo_incref(rscreen->rw, rbuffer->bo); + vs_resource->nbo = 1; + vs_resource->states[R600_PS_RESOURCE__RESOURCE0_WORD0] = offset; + vs_resource->states[R600_PS_RESOURCE__RESOURCE0_WORD1] = rbuffer->bo->size - offset; + vs_resource->states[R600_PS_RESOURCE__RESOURCE0_WORD2] = S_038008_STRIDE(vertex_buffer->stride) | S_038008_DATA_FORMAT(format); - vs_resource.states[R600_PS_RESOURCE__RESOURCE0_WORD3] = 0x00000000; - vs_resource.states[R600_PS_RESOURCE__RESOURCE0_WORD4] = 0x00000000; - vs_resource.states[R600_PS_RESOURCE__RESOURCE0_WORD5] = 0x00000000; - vs_resource.states[R600_PS_RESOURCE__RESOURCE0_WORD6] = 0xC0000000; - vs_resource.placement[0] = RADEON_GEM_DOMAIN_GTT; - vs_resource.placement[1] = RADEON_GEM_DOMAIN_GTT; - radeon_state_pm4(&vs_resource); - r = radeon_draw_set(&rctx->draw, &vs_resource); + vs_resource->states[R600_PS_RESOURCE__RESOURCE0_WORD3] = 0x00000000; + vs_resource->states[R600_PS_RESOURCE__RESOURCE0_WORD4] = 0x00000000; + vs_resource->states[R600_PS_RESOURCE__RESOURCE0_WORD5] = 0x00000000; + vs_resource->states[R600_PS_RESOURCE__RESOURCE0_WORD6] = 0xC0000000; + vs_resource->placement[0] = RADEON_GEM_DOMAIN_GTT; + vs_resource->placement[1] = RADEON_GEM_DOMAIN_GTT; + r = radeon_draw_set_new(rctx->draw, vs_resource); if (r) return r; } /* FIXME start need to change winsys */ - r = radeon_state_init(&draw->draw, rscreen->rw, R600_DRAW_TYPE, R600_DRAW); - if (r) - return r; - draw->draw.states[R600_DRAW__VGT_NUM_INDICES] = draw->count; - draw->draw.states[R600_DRAW__VGT_DRAW_INITIATOR] = vgt_draw_initiator; + draw->draw = radeon_state(rscreen->rw, R600_DRAW_TYPE, R600_DRAW); + if (draw->draw == NULL) + return -ENOMEM; + draw->draw->states[R600_DRAW__VGT_NUM_INDICES] = draw->count; + draw->draw->states[R600_DRAW__VGT_DRAW_INITIATOR] = vgt_draw_initiator; if (draw->index_buffer) { - rbuffer = (struct r600_resource*)draw->index_buffer; - draw->draw.bo[0] = radeon_bo_incref(rscreen->rw, rbuffer->bo); - draw->draw.placement[0] = RADEON_GEM_DOMAIN_GTT; - draw->draw.placement[1] = RADEON_GEM_DOMAIN_GTT; - draw->draw.nbo = 1; + rbuffer = (struct r600_buffer*)draw->index_buffer; + draw->draw->bo[0] = radeon_bo_incref(rscreen->rw, rbuffer->bo); + draw->draw->placement[0] = RADEON_GEM_DOMAIN_GTT; + draw->draw->placement[1] = RADEON_GEM_DOMAIN_GTT; + draw->draw->nbo = 1; } - radeon_state_pm4(&draw->draw); - r = radeon_draw_set(&rctx->draw, &draw->draw); + r = radeon_draw_set_new(rctx->draw, draw->draw); if (r) return r; - r = radeon_state_init(&draw->vgt, rscreen->rw, R600_VGT_TYPE, R600_VGT); - if (r) - return r; - draw->vgt.states[R600_VGT__VGT_PRIMITIVE_TYPE] = prim; - draw->vgt.states[R600_VGT__VGT_MAX_VTX_INDX] = 0x00FFFFFF; - draw->vgt.states[R600_VGT__VGT_MIN_VTX_INDX] = 0x00000000; - draw->vgt.states[R600_VGT__VGT_INDX_OFFSET] = draw->start; - draw->vgt.states[R600_VGT__VGT_MULTI_PRIM_IB_RESET_INDX] = 0x00000000; - draw->vgt.states[R600_VGT__VGT_DMA_INDEX_TYPE] = vgt_dma_index_type; - draw->vgt.states[R600_VGT__VGT_PRIMITIVEID_EN] = 0x00000000; - draw->vgt.states[R600_VGT__VGT_DMA_NUM_INSTANCES] = 0x00000001; - draw->vgt.states[R600_VGT__VGT_MULTI_PRIM_IB_RESET_EN] = 0x00000000; - draw->vgt.states[R600_VGT__VGT_INSTANCE_STEP_RATE_0] = 0x00000000; - draw->vgt.states[R600_VGT__VGT_INSTANCE_STEP_RATE_1] = 0x00000000; - radeon_state_pm4(&draw->vgt); - r = radeon_draw_set(&rctx->draw, &draw->vgt); + draw->vgt = radeon_state(rscreen->rw, R600_VGT_TYPE, R600_VGT); + if (draw->vgt == NULL) + return -ENOMEM; + draw->vgt->states[R600_VGT__VGT_PRIMITIVE_TYPE] = prim; + draw->vgt->states[R600_VGT__VGT_MAX_VTX_INDX] = 0x00FFFFFF; + draw->vgt->states[R600_VGT__VGT_MIN_VTX_INDX] = 0x00000000; + draw->vgt->states[R600_VGT__VGT_INDX_OFFSET] = draw->start; + draw->vgt->states[R600_VGT__VGT_MULTI_PRIM_IB_RESET_INDX] = 0x00000000; + draw->vgt->states[R600_VGT__VGT_DMA_INDEX_TYPE] = vgt_dma_index_type; + draw->vgt->states[R600_VGT__VGT_PRIMITIVEID_EN] = 0x00000000; + draw->vgt->states[R600_VGT__VGT_DMA_NUM_INSTANCES] = 0x00000001; + draw->vgt->states[R600_VGT__VGT_MULTI_PRIM_IB_RESET_EN] = 0x00000000; + draw->vgt->states[R600_VGT__VGT_INSTANCE_STEP_RATE_0] = 0x00000000; + draw->vgt->states[R600_VGT__VGT_INSTANCE_STEP_RATE_1] = 0x00000000; + r = radeon_draw_set_new(rctx->draw, draw->vgt); if (r) return r; /* FIXME */ - r = radeon_ctx_set_draw(&rctx->ctx, &rctx->draw); + r = radeon_ctx_set_draw_new(rctx->ctx, rctx->draw); if (r == -EBUSY) { r600_flush(draw->ctx, 0, NULL); - r = radeon_ctx_set_draw(&rctx->ctx, &rctx->draw); + r = radeon_ctx_set_draw_new(rctx->ctx, rctx->draw); } - return r; + if (r) + return r; + rctx->draw = radeon_draw_duplicate(rctx->draw); + return 0; } void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info) diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index d925dcbe4b..dc8d4cb315 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -130,12 +130,11 @@ static int r600_pipe_shader_vs(struct pipe_context *ctx, struct r600_context_sta struct r600_shader *rshader = &rpshader->shader; struct radeon_state *state; unsigned i, tmp; - int r; - r = radeon_state_init(&rpshader->rstate, rscreen->rw, R600_VS_SHADER_TYPE, R600_VS_SHADER); - if (r) - return r; - state = &rpshader->rstate; + rpshader->rstate = radeon_state_decref(rpshader->rstate); + state = radeon_state(rscreen->rw, R600_VS_SHADER_TYPE, R600_VS_SHADER); + if (state == NULL) + return -ENOMEM; for (i = 0; i < 10; i++) { state->states[R600_VS_SHADER__SPI_VS_OUT_ID_0 + i] = 0; } @@ -146,10 +145,11 @@ static int r600_pipe_shader_vs(struct pipe_context *ctx, struct r600_context_sta } state->states[R600_VS_SHADER__SPI_VS_OUT_CONFIG] = S_0286C4_VS_EXPORT_COUNT(rshader->noutput - 2); state->states[R600_VS_SHADER__SQ_PGM_RESOURCES_VS] = S_028868_NUM_GPRS(rshader->bc.ngpr); - rpshader->rstate.bo[0] = radeon_bo_incref(rscreen->rw, rpshader->bo); - rpshader->rstate.bo[1] = radeon_bo_incref(rscreen->rw, rpshader->bo); - rpshader->rstate.nbo = 2; - rpshader->rstate.placement[0] = RADEON_GEM_DOMAIN_GTT; + rpshader->rstate = state; + rpshader->rstate->bo[0] = radeon_bo_incref(rscreen->rw, rpshader->bo); + rpshader->rstate->bo[1] = radeon_bo_incref(rscreen->rw, rpshader->bo); + rpshader->rstate->nbo = 2; + rpshader->rstate->placement[0] = RADEON_GEM_DOMAIN_GTT; return radeon_state_pm4(state); } @@ -159,12 +159,11 @@ static int r600_pipe_shader_ps(struct pipe_context *ctx, struct r600_context_sta struct r600_shader *rshader = &rpshader->shader; struct radeon_state *state; unsigned i, tmp, exports_ps, num_cout; - int r; - r = radeon_state_init(&rpshader->rstate, rscreen->rw, R600_PS_SHADER_TYPE, R600_PS_SHADER); - if (r) - return r; - state = &rpshader->rstate; + rpshader->rstate = radeon_state_decref(rpshader->rstate); + state = radeon_state(rscreen->rw, R600_PS_SHADER_TYPE, R600_PS_SHADER); + if (state == NULL) + return -ENOMEM; for (i = 0; i < rshader->ninput; i++) { tmp = S_028644_SEMANTIC(i); tmp |= S_028644_SEL_CENTROID(1); @@ -190,9 +189,10 @@ static int r600_pipe_shader_ps(struct pipe_context *ctx, struct r600_context_sta state->states[R600_PS_SHADER__SPI_PS_IN_CONTROL_1] = 0x00000000; state->states[R600_PS_SHADER__SQ_PGM_RESOURCES_PS] = S_028868_NUM_GPRS(rshader->bc.ngpr); state->states[R600_PS_SHADER__SQ_PGM_EXPORTS_PS] = exports_ps; - rpshader->rstate.bo[0] = radeon_bo_incref(rscreen->rw, rpshader->bo); - rpshader->rstate.nbo = 1; - rpshader->rstate.placement[0] = RADEON_GEM_DOMAIN_GTT; + rpshader->rstate = state; + rpshader->rstate->bo[0] = radeon_bo_incref(rscreen->rw, rpshader->bo); + rpshader->rstate->nbo = 1; + rpshader->rstate->placement[0] = RADEON_GEM_DOMAIN_GTT; return radeon_state_pm4(state); } diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index c5e74d1efc..e8871cd748 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -277,10 +277,9 @@ static void r600_set_constant_buffer(struct pipe_context *ctx, struct r600_screen *rscreen = r600_screen(ctx->screen); struct r600_context *rctx = r600_context(ctx); unsigned nconstant = 0, i, type, id; - struct radeon_state rstate; + struct radeon_state *rstate; struct pipe_transfer *transfer; u32 *ptr; - int r; switch (shader) { case PIPE_SHADER_VERTEX: @@ -301,16 +300,16 @@ static void r600_set_constant_buffer(struct pipe_context *ctx, if (ptr == NULL) return; for (i = 0; i < nconstant; i++) { - r = radeon_state_init(&rstate, rscreen->rw, type, id + i); - if (r) + rstate = radeon_state(rscreen->rw, type, id + i); + if (rstate == NULL) return; - rstate.states[R600_PS_CONSTANT__SQ_ALU_CONSTANT0_0] = ptr[i * 4 + 0]; - rstate.states[R600_PS_CONSTANT__SQ_ALU_CONSTANT1_0] = ptr[i * 4 + 1]; - rstate.states[R600_PS_CONSTANT__SQ_ALU_CONSTANT2_0] = ptr[i * 4 + 2]; - rstate.states[R600_PS_CONSTANT__SQ_ALU_CONSTANT3_0] = ptr[i * 4 + 3]; - if (radeon_state_pm4(&rstate)) + rstate->states[R600_PS_CONSTANT__SQ_ALU_CONSTANT0_0] = ptr[i * 4 + 0]; + rstate->states[R600_PS_CONSTANT__SQ_ALU_CONSTANT1_0] = ptr[i * 4 + 1]; + rstate->states[R600_PS_CONSTANT__SQ_ALU_CONSTANT2_0] = ptr[i * 4 + 2]; + rstate->states[R600_PS_CONSTANT__SQ_ALU_CONSTANT3_0] = ptr[i * 4 + 3]; + if (radeon_state_pm4(rstate)) return; - if (radeon_draw_set(&rctx->draw, &rstate)) + if (radeon_draw_set_new(rctx->draw, rstate)) return; } pipe_buffer_unmap(ctx, buffer, transfer); @@ -521,6 +520,7 @@ struct r600_context_state *r600_context_state_decref(struct r600_context_state * R600_ERR("invalid type %d\n", rstate->type); return NULL; } + radeon_state_decref(rstate->rstate); FREE(rstate); return NULL; } @@ -603,17 +603,16 @@ struct r600_context_state *r600_context_state(struct r600_context *rctx, unsigne return rstate; } -static int r600_blend(struct r600_context *rctx, struct radeon_state *rstate) +static struct radeon_state *r600_blend(struct r600_context *rctx) { struct r600_screen *rscreen = rctx->screen; + struct radeon_state *rstate; const struct pipe_blend_state *state = &rctx->blend->state.blend; int i; - int r; - - r = radeon_state_init(rstate, rscreen->rw, R600_BLEND_TYPE, R600_BLEND); - if (r) - return r; + rstate = radeon_state(rscreen->rw, R600_BLEND_TYPE, R600_BLEND); + if (rstate == NULL) + return NULL; rstate->states[R600_BLEND__CB_BLEND_RED] = fui(rctx->blend_color.color[0]); rstate->states[R600_BLEND__CB_BLEND_GREEN] = fui(rctx->blend_color.color[1]); rstate->states[R600_BLEND__CB_BLEND_BLUE] = fui(rctx->blend_color.color[2]); @@ -657,27 +656,30 @@ static int r600_blend(struct r600_context *rctx, struct radeon_state *rstate) rstate->states[R600_BLEND__CB_BLEND_CONTROL] = bc; } - return radeon_state_pm4(rstate); + if (radeon_state_pm4(rstate)) { + radeon_state_decref(rstate); + return NULL; + } + return rstate; } -static int r600_cb(struct r600_context *rctx, struct radeon_state *rstate, int cb) +static struct radeon_state *r600_cb(struct r600_context *rctx, int cb) { struct r600_screen *rscreen = rctx->screen; struct r600_resource_texture *rtex; struct r600_resource *rbuffer; + struct radeon_state *rstate; const struct pipe_framebuffer_state *state = &rctx->framebuffer->state.framebuffer; unsigned level = state->cbufs[cb]->level; unsigned pitch, slice; unsigned color_info; unsigned format, swap, ntype; - int r; const struct util_format_description *desc; int id = R600_CB0 + cb; - r = radeon_state_init(rstate, rscreen->rw, R600_CB0_TYPE, id); - if (r) - return r; - + rstate = radeon_state(rscreen->rw, R600_CB0_TYPE, id); + if (rstate == NULL) + return NULL; rtex = (struct r600_resource_texture*)state->cbufs[cb]->texture; rbuffer = &rtex->resource; rstate->bo[0] = radeon_bo_incref(rscreen->rw, rbuffer->bo); @@ -712,25 +714,30 @@ static int r600_cb(struct r600_context *rctx, struct radeon_state *rstate, int c rstate->states[R600_CB0__CB_COLOR0_FRAG] = 0x00000000; rstate->states[R600_CB0__CB_COLOR0_TILE] = 0x00000000; rstate->states[R600_CB0__CB_COLOR0_MASK] = 0x00000000; - return radeon_state_pm4(rstate); + if (radeon_state_pm4(rstate)) { + radeon_state_decref(rstate); + return NULL; + } + return rstate; } -static int r600_db(struct r600_context *rctx, struct radeon_state *rstate) +static struct radeon_state *r600_db(struct r600_context *rctx) { struct r600_screen *rscreen = rctx->screen; struct r600_resource_texture *rtex; struct r600_resource *rbuffer; + struct radeon_state *rstate; const struct pipe_framebuffer_state *state = &rctx->framebuffer->state.framebuffer; unsigned level = state->cbufs[0]->level; unsigned pitch, slice, format; - int r; - - r = radeon_state_init(rstate, rscreen->rw, R600_DB_TYPE, R600_DB); - if (r) - return r; if (state->zsbuf == NULL) - return 0; + return NULL; + + rstate = radeon_state(rscreen->rw, R600_DB_TYPE, R600_DB); + if (rstate == NULL) + return NULL; + rtex = (struct r600_resource_texture*)state->zsbuf->texture; rbuffer = &rtex->resource; rstate->bo[0] = radeon_bo_incref(rscreen->rw, rbuffer->bo); @@ -747,22 +754,23 @@ static int r600_db(struct r600_context *rctx, struct radeon_state *rstate) rstate->states[R600_DB__DB_PREFETCH_LIMIT] = (state->zsbuf->height / 8) -1; rstate->states[R600_DB__DB_DEPTH_SIZE] = S_028000_PITCH_TILE_MAX(pitch) | S_028000_SLICE_TILE_MAX(slice); - return radeon_state_pm4(rstate); + if (radeon_state_pm4(rstate)) { + radeon_state_decref(rstate); + return NULL; + } + return rstate; } -static int r600_rasterizer(struct r600_context *rctx, struct radeon_state *rstate) +static struct radeon_state *r600_rasterizer(struct r600_context *rctx) { const struct pipe_rasterizer_state *state = &rctx->rasterizer->state.rasterizer; const struct pipe_framebuffer_state *fb = &rctx->framebuffer->state.framebuffer; struct r600_screen *rscreen = rctx->screen; + struct radeon_state *rstate; float offset_units = 0, offset_scale = 0; char depth = 0; unsigned offset_db_fmt_cntl = 0; - int r; - r = radeon_state_init(rstate, rscreen->rw, R600_RASTERIZER_TYPE, R600_RASTERIZER); - if (r) - return r; if (fb->zsbuf) { offset_units = state->offset_units; offset_scale = state->offset_scale * 12.0f; @@ -783,12 +791,15 @@ static int r600_rasterizer(struct r600_context *rctx, struct radeon_state *rstat break; default: R600_ERR("unsupported %d\n", fb->zsbuf->texture->format); - return -EINVAL; + return NULL; } } offset_db_fmt_cntl |= S_028DF8_POLY_OFFSET_NEG_NUM_DB_BITS(depth); rctx->flat_shade = state->flatshade; + rstate = radeon_state(rscreen->rw, R600_RASTERIZER_TYPE, R600_RASTERIZER); + if (rstate == NULL) + return NULL; rstate->states[R600_RASTERIZER__SPI_INTERP_CONTROL_0] = 0x00000001; rstate->states[R600_RASTERIZER__PA_CL_CLIP_CNTL] = 0x00000000; rstate->states[R600_RASTERIZER__PA_SU_SC_MODE_CNTL] = 0x00080000 | @@ -816,21 +827,25 @@ static int r600_rasterizer(struct r600_context *rctx, struct radeon_state *rstat rstate->states[R600_RASTERIZER__PA_SU_POLY_OFFSET_FRONT_OFFSET] = fui(offset_units); rstate->states[R600_RASTERIZER__PA_SU_POLY_OFFSET_BACK_SCALE] = fui(offset_scale); rstate->states[R600_RASTERIZER__PA_SU_POLY_OFFSET_BACK_OFFSET] = fui(offset_units); - return radeon_state_pm4(rstate); + if (radeon_state_pm4(rstate)) { + radeon_state_decref(rstate); + return NULL; + } + return rstate; } -static int r600_scissor(struct r600_context *rctx, struct radeon_state *rstate) +static struct radeon_state *r600_scissor(struct r600_context *rctx) { const struct pipe_scissor_state *state = &rctx->scissor->state.scissor; struct r600_screen *rscreen = rctx->screen; + struct radeon_state *rstate; u32 tl, br; - int r; - r = radeon_state_init(rstate, rscreen->rw, R600_SCISSOR_TYPE, R600_SCISSOR); - if (r) - return r; tl = S_028240_TL_X(state->minx) | S_028240_TL_Y(state->miny) | S_028240_WINDOW_OFFSET_DISABLE(1); br = S_028244_BR_X(state->maxx) | S_028244_BR_Y(state->maxy); + rstate = radeon_state(rscreen->rw, R600_SCISSOR_TYPE, R600_SCISSOR); + if (rstate == NULL) + return NULL; rstate->states[R600_SCISSOR__PA_SC_SCREEN_SCISSOR_TL] = tl; rstate->states[R600_SCISSOR__PA_SC_SCREEN_SCISSOR_BR] = br; rstate->states[R600_SCISSOR__PA_SC_WINDOW_OFFSET] = 0x00000000; @@ -850,18 +865,22 @@ static int r600_scissor(struct r600_context *rctx, struct radeon_state *rstate) rstate->states[R600_SCISSOR__PA_SC_GENERIC_SCISSOR_BR] = br; rstate->states[R600_SCISSOR__PA_SC_VPORT_SCISSOR_0_TL] = tl; rstate->states[R600_SCISSOR__PA_SC_VPORT_SCISSOR_0_BR] = br; - return radeon_state_pm4(rstate); + if (radeon_state_pm4(rstate)) { + radeon_state_decref(rstate); + return NULL; + } + return rstate; } -static int r600_viewport(struct r600_context *rctx, struct radeon_state *rstate) +static struct radeon_state *r600_viewport(struct r600_context *rctx) { const struct pipe_viewport_state *state = &rctx->viewport->state.viewport; struct r600_screen *rscreen = rctx->screen; - int r; + struct radeon_state *rstate; - r = radeon_state_init(rstate, rscreen->rw, R600_VIEWPORT_TYPE, R600_VIEWPORT); - if (r) - return r; + rstate = radeon_state(rscreen->rw, R600_VIEWPORT_TYPE, R600_VIEWPORT); + if (rstate == NULL) + return NULL; rstate->states[R600_VIEWPORT__PA_SC_VPORT_ZMIN_0] = 0x00000000; rstate->states[R600_VIEWPORT__PA_SC_VPORT_ZMAX_0] = 0x3F800000; rstate->states[R600_VIEWPORT__PA_CL_VPORT_XSCALE_0] = fui(state->scale[0]); @@ -871,22 +890,27 @@ static int r600_viewport(struct r600_context *rctx, struct radeon_state *rstate) rstate->states[R600_VIEWPORT__PA_CL_VPORT_YOFFSET_0] = fui(state->translate[1]); rstate->states[R600_VIEWPORT__PA_CL_VPORT_ZOFFSET_0] = fui(state->translate[2]); rstate->states[R600_VIEWPORT__PA_CL_VTE_CNTL] = 0x0000043F; - return radeon_state_pm4(rstate); + if (radeon_state_pm4(rstate)) { + radeon_state_decref(rstate); + return NULL; + } + return rstate; } -static int r600_dsa(struct r600_context *rctx, struct radeon_state *rstate) +static struct radeon_state *r600_dsa(struct r600_context *rctx) { const struct pipe_depth_stencil_alpha_state *state = &rctx->dsa->state.dsa; const struct pipe_stencil_ref *stencil_ref = &rctx->stencil_ref->state.stencil_ref; struct r600_screen *rscreen = rctx->screen; unsigned db_depth_control, alpha_test_control, alpha_ref, db_shader_control; unsigned stencil_ref_mask, stencil_ref_mask_bf; - int r, i; struct r600_shader *rshader = &rctx->ps_shader->shader; + struct radeon_state *rstate; + int i; - r = radeon_state_init(rstate, rscreen->rw, R600_DSA_TYPE, R600_DSA); - if (r) - return r; + rstate = radeon_state(rscreen->rw, R600_DSA_TYPE, R600_DSA); + if (rstate == NULL) + return NULL; db_shader_control = 0x210; for (i = 0; i < rshader->noutput; i++) { @@ -898,8 +922,8 @@ static int r600_dsa(struct r600_context *rctx, struct radeon_state *rstate) db_depth_control = S_028800_Z_ENABLE(state->depth.enabled) | S_028800_Z_WRITE_ENABLE(state->depth.writemask) | S_028800_ZFUNC(state->depth.func); - /* set stencil enable */ + if (state->stencil[0].enabled) { db_depth_control |= S_028800_STENCIL_ENABLE(1); db_depth_control |= S_028800_STENCILFUNC(r600_translate_ds_func(state->stencil[0].func)); @@ -946,7 +970,11 @@ static int r600_dsa(struct r600_context *rctx, struct radeon_state *rstate) rstate->states[R600_DSA__DB_SRESULTS_COMPARE_STATE1] = 0x00000000; rstate->states[R600_DSA__DB_PRELOAD_CONTROL] = 0x00000000; rstate->states[R600_DSA__DB_ALPHA_TO_MASK] = 0x0000AA00; - return radeon_state_pm4(rstate); + if (radeon_state_pm4(rstate)) { + radeon_state_decref(rstate); + return NULL; + } + return rstate; } static inline unsigned r600_tex_wrap(unsigned wrap) @@ -1024,15 +1052,16 @@ static INLINE u32 S_FIXED(float value, u32 frac_bits) return value * (1 << frac_bits); } -static int r600_sampler(struct r600_context *rctx, struct radeon_state *rstate, - const struct pipe_sampler_state *state, unsigned id) +static struct radeon_state *r600_sampler(struct r600_context *rctx, + const struct pipe_sampler_state *state, + unsigned id) { struct r600_screen *rscreen = rctx->screen; - int r; + struct radeon_state *rstate; - r = radeon_state_init(rstate, rscreen->rw, R600_PS_SAMPLER_TYPE, id); - if (r) - return r; + rstate = radeon_state(rscreen->rw, R600_PS_SAMPLER_TYPE, id); + if (rstate == NULL) + return NULL; rstate->states[R600_PS_SAMPLER__SQ_TEX_SAMPLER_WORD0_0] = S_03C000_CLAMP_X(r600_tex_wrap(state->wrap_s)) | S_03C000_CLAMP_Y(r600_tex_wrap(state->wrap_t)) | @@ -1047,7 +1076,11 @@ static int r600_sampler(struct r600_context *rctx, struct radeon_state *rstate, S_03C004_MAX_LOD(S_FIXED(CLAMP(state->max_lod, 0, 15), 6)) | S_03C004_LOD_BIAS(S_FIXED(CLAMP(state->lod_bias, -16, 16), 6)); rstate->states[R600_PS_SAMPLER__SQ_TEX_SAMPLER_WORD2_0] = S_03C008_TYPE(1); - return radeon_state_pm4(rstate); + if (radeon_state_pm4(rstate)) { + radeon_state_decref(rstate); + return NULL; + } + return rstate; } static inline unsigned r600_tex_swizzle(unsigned swizzle) @@ -1097,26 +1130,28 @@ static inline unsigned r600_tex_dim(unsigned dim) } } -static int r600_resource(struct r600_context *rctx, struct radeon_state *rstate, - const struct pipe_sampler_view *view, unsigned id) +static struct radeon_state *r600_resource(struct r600_context *rctx, + const struct pipe_sampler_view *view, + unsigned id) { struct r600_screen *rscreen = rctx->screen; const struct util_format_description *desc; struct r600_resource_texture *tmp; struct r600_resource *rbuffer; + struct radeon_state *rstate; unsigned format; - int r; - r = radeon_state_init(rstate, rscreen->rw, R600_PS_RESOURCE_TYPE, id); - if (r) - return r; format = r600_translate_colorformat(view->texture->format); if (format == ~0) - return -EINVAL; + return NULL; desc = util_format_description(view->texture->format); if (desc == NULL) { R600_ERR("unknow format %d\n", view->texture->format); - return -EINVAL; + return NULL; + } + rstate = radeon_state(rscreen->rw, R600_PS_RESOURCE_TYPE, id); + if (rstate == NULL) { + return NULL; } tmp = (struct r600_resource_texture*)view->texture; rbuffer = &tmp->resource; @@ -1159,16 +1194,21 @@ static int r600_resource(struct r600_context *rctx, struct radeon_state *rstate, S_038014_LAST_ARRAY(0); rstate->states[R600_PS_RESOURCE__RESOURCE0_WORD6] = S_038018_TYPE(V_038010_SQ_TEX_VTX_VALID_TEXTURE); - return radeon_state_pm4(rstate); + if (radeon_state_pm4(rstate)) { + radeon_state_decref(rstate); + return NULL; + } + return rstate; } -static int r600_cb_cntl(struct r600_context *rctx, struct radeon_state *rstate) +static struct radeon_state *r600_cb_cntl(struct r600_context *rctx) { struct r600_screen *rscreen = rctx->screen; + struct radeon_state *rstate; const struct pipe_blend_state *pbs = &rctx->blend->state.blend; int nr_cbufs = rctx->framebuffer->state.framebuffer.nr_cbufs; uint32_t color_control, target_mask, shader_mask; - int i, r; + int i; target_mask = 0; shader_mask = 0; @@ -1190,10 +1230,7 @@ static int r600_cb_cntl(struct r600_context *rctx, struct radeon_state *rstate) target_mask |= (pbs->rt[i].colormask << (4 * i)); } - r = radeon_state_init(rstate, rscreen->rw, R600_CB_CNTL_TYPE, R600_CB_CNTL); - if (r) - return r; - + rstate = radeon_state(rscreen->rw, R600_CB_CNTL_TYPE, R600_CB_CNTL); rstate->states[R600_CB_CNTL__CB_SHADER_MASK] = shader_mask; rstate->states[R600_CB_CNTL__CB_TARGET_MASK] = target_mask; rstate->states[R600_CB_CNTL__CB_COLOR_CONTROL] = color_control; @@ -1205,7 +1242,11 @@ static int r600_cb_cntl(struct r600_context *rctx, struct radeon_state *rstate) rstate->states[R600_CB_CNTL__CB_CLRCMP_DST] = 0x000000FF; rstate->states[R600_CB_CNTL__CB_CLRCMP_MSK] = 0xFFFFFFFF; rstate->states[R600_CB_CNTL__PA_SC_AA_MASK] = 0xFFFFFFFF; - return radeon_state_pm4(rstate); + if (radeon_state_pm4(rstate)) { + radeon_state_decref(rstate); + return NULL; + } + return rstate; } int r600_context_hw_states(struct r600_context *rctx) @@ -1217,77 +1258,97 @@ int r600_context_hw_states(struct r600_context *rctx) /* free previous TODO determine what need to be updated, what * doesn't */ - memset(&rctx->hw_states, 0, sizeof(struct r600_context_hw_states)); + //radeon_state_decref(rctx->hw_states.config); + rctx->hw_states.cb_cntl = radeon_state_decref(rctx->hw_states.cb_cntl); + rctx->hw_states.db = radeon_state_decref(rctx->hw_states.db); + rctx->hw_states.rasterizer = radeon_state_decref(rctx->hw_states.rasterizer); + rctx->hw_states.scissor = radeon_state_decref(rctx->hw_states.scissor); + rctx->hw_states.dsa = radeon_state_decref(rctx->hw_states.dsa); + rctx->hw_states.blend = radeon_state_decref(rctx->hw_states.blend); + rctx->hw_states.viewport = radeon_state_decref(rctx->hw_states.viewport); + for (i = 0; i < 8; i++) { + rctx->hw_states.cb[i] = radeon_state_decref(rctx->hw_states.cb[i]); + } + for (i = 0; i < rctx->hw_states.ps_nresource; i++) { + radeon_state_decref(rctx->hw_states.ps_resource[i]); + rctx->hw_states.ps_resource[i] = NULL; + } + rctx->hw_states.ps_nresource = 0; + for (i = 0; i < rctx->hw_states.ps_nsampler; i++) { + radeon_state_decref(rctx->hw_states.ps_sampler[i]); + rctx->hw_states.ps_sampler[i] = NULL; + } + rctx->hw_states.ps_nsampler = 0; /* build new states */ - rctx->hw_states.config = rctx->config; - r600_rasterizer(rctx, &rctx->hw_states.rasterizer); - r600_scissor(rctx, &rctx->hw_states.scissor); - r600_dsa(rctx, &rctx->hw_states.dsa); - r600_blend(rctx, &rctx->hw_states.blend); - r600_viewport(rctx, &rctx->hw_states.viewport); - for (i = 0; i < nr_cbufs; i++) - r600_cb(rctx, &rctx->hw_states.cb[i], i); - r600_db(rctx, &rctx->hw_states.db); - r600_cb_cntl(rctx, &rctx->hw_states.cb_cntl); + rctx->hw_states.rasterizer = r600_rasterizer(rctx); + rctx->hw_states.scissor = r600_scissor(rctx); + rctx->hw_states.dsa = r600_dsa(rctx); + rctx->hw_states.blend = r600_blend(rctx); + rctx->hw_states.viewport = r600_viewport(rctx); + for (i = 0; i < nr_cbufs; i++) { + rctx->hw_states.cb[i] = r600_cb(rctx, i); + } + rctx->hw_states.db = r600_db(rctx); + rctx->hw_states.cb_cntl = r600_cb_cntl(rctx); for (i = 0; i < rctx->ps_nsampler; i++) { if (rctx->ps_sampler[i]) { - r600_sampler(rctx, &rctx->hw_states.ps_sampler[i], - &rctx->ps_sampler[i]->state.sampler, - R600_PS_SAMPLER + i); + rctx->hw_states.ps_sampler[i] = r600_sampler(rctx, + &rctx->ps_sampler[i]->state.sampler, + R600_PS_SAMPLER + i); } } rctx->hw_states.ps_nsampler = rctx->ps_nsampler; for (i = 0; i < rctx->ps_nsampler_view; i++) { if (rctx->ps_sampler_view[i]) { - r600_resource(rctx, &rctx->hw_states.ps_resource[i], - &rctx->ps_sampler_view[i]->state.sampler_view, - R600_PS_RESOURCE + i); + rctx->hw_states.ps_resource[i] = r600_resource(rctx, + &rctx->ps_sampler_view[i]->state.sampler_view, + R600_PS_RESOURCE + i); } } rctx->hw_states.ps_nresource = rctx->ps_nsampler_view; /* bind states */ - r = radeon_draw_set(&rctx->draw, &rctx->hw_states.db); + r = radeon_draw_set(rctx->draw, rctx->hw_states.db); if (r) return r; - r = radeon_draw_set(&rctx->draw, &rctx->hw_states.rasterizer); + r = radeon_draw_set(rctx->draw, rctx->hw_states.rasterizer); if (r) return r; - r = radeon_draw_set(&rctx->draw, &rctx->hw_states.scissor); + r = radeon_draw_set(rctx->draw, rctx->hw_states.scissor); if (r) return r; - r = radeon_draw_set(&rctx->draw, &rctx->hw_states.dsa); + r = radeon_draw_set(rctx->draw, rctx->hw_states.dsa); if (r) return r; - r = radeon_draw_set(&rctx->draw, &rctx->hw_states.blend); + r = radeon_draw_set(rctx->draw, rctx->hw_states.blend); if (r) return r; - r = radeon_draw_set(&rctx->draw, &rctx->hw_states.viewport); + r = radeon_draw_set(rctx->draw, rctx->hw_states.viewport); if (r) return r; for (i = 0; i < nr_cbufs; i++) { - r = radeon_draw_set(&rctx->draw, &rctx->hw_states.cb[i]); + r = radeon_draw_set(rctx->draw, rctx->hw_states.cb[i]); if (r) return r; } - r = radeon_draw_set(&rctx->draw, &rctx->hw_states.config); + r = radeon_draw_set(rctx->draw, rctx->hw_states.config); if (r) return r; - r = radeon_draw_set(&rctx->draw, &rctx->hw_states.cb_cntl); + r = radeon_draw_set(rctx->draw, rctx->hw_states.cb_cntl); if (r) return r; for (i = 0; i < rctx->hw_states.ps_nresource; i++) { - if (rctx->hw_states.ps_resource[i].valid) { - r = radeon_draw_set(&rctx->draw, &rctx->hw_states.ps_resource[i]); + if (rctx->hw_states.ps_resource[i]) { + r = radeon_draw_set(rctx->draw, rctx->hw_states.ps_resource[i]); if (r) return r; } } for (i = 0; i < rctx->hw_states.ps_nsampler; i++) { - if (rctx->hw_states.ps_sampler[i].valid) { - r = radeon_draw_set(&rctx->draw, &rctx->hw_states.ps_sampler[i]); + if (rctx->hw_states.ps_sampler[i]) { + r = radeon_draw_set(rctx->draw, rctx->hw_states.ps_sampler[i]); if (r) return r; } diff --git a/src/gallium/drivers/r600/radeon.h b/src/gallium/drivers/r600/radeon.h index 709ef8a85a..3a8405f9b4 100644 --- a/src/gallium/drivers/r600/radeon.h +++ b/src/gallium/drivers/r600/radeon.h @@ -103,17 +103,17 @@ int radeon_bo_wait(struct radeon *radeon, struct radeon_bo *bo); */ struct radeon_state { struct radeon *radeon; - unsigned valid; + unsigned refcount; unsigned type; unsigned id; unsigned nstates; - u32 states[64]; + u32 *states; unsigned npm4; unsigned cpm4; u32 pm4_crc; - u32 pm4[128]; + u32 *pm4; u32 nimmd; - u32 immd[64]; + u32 *immd; unsigned nbo; struct radeon_bo *bo[4]; unsigned nreloc; @@ -123,51 +123,35 @@ struct radeon_state { unsigned bo_dirty[4]; }; -int radeon_state_init(struct radeon_state *state, struct radeon *radeon, u32 type, u32 id); +struct radeon_state *radeon_state(struct radeon *radeon, u32 type, u32 id); +struct radeon_state *radeon_state_incref(struct radeon_state *state); +struct radeon_state *radeon_state_decref(struct radeon_state *state); int radeon_state_pm4(struct radeon_state *state); /* * draw functions */ struct radeon_draw { + unsigned refcount; struct radeon *radeon; unsigned nstate; - struct radeon_state state[1273]; + struct radeon_state **state; unsigned cpm4; }; -int radeon_draw_init(struct radeon_draw *draw, struct radeon *radeon); +struct radeon_draw *radeon_draw(struct radeon *radeon); +struct radeon_draw *radeon_draw_duplicate(struct radeon_draw *draw); +struct radeon_draw *radeon_draw_incref(struct radeon_draw *draw); +struct radeon_draw *radeon_draw_decref(struct radeon_draw *draw); int radeon_draw_set(struct radeon_draw *draw, struct radeon_state *state); +int radeon_draw_set_new(struct radeon_draw *draw, struct radeon_state *state); int radeon_draw_check(struct radeon_draw *draw); -/* - * Context - */ -#pragma pack(1) -struct radeon_cs_reloc { - uint32_t handle; - uint32_t read_domain; - uint32_t write_domain; - uint32_t flags; -}; -#pragma pack() - -struct radeon_ctx { - struct radeon *radeon; - u32 *pm4; - u32 cpm4; - u32 draw_cpm4; - unsigned id; - unsigned nreloc; - struct radeon_cs_reloc reloc[2048]; - unsigned nbo; - struct radeon_bo *bo[2048]; - unsigned ndraw; - struct radeon_draw draw[128]; -}; - -int radeon_ctx_init(struct radeon_ctx *ctx, struct radeon *radeon); +struct radeon_ctx *radeon_ctx(struct radeon *radeon); +struct radeon_ctx *radeon_ctx_decref(struct radeon_ctx *ctx); +struct radeon_ctx *radeon_ctx_incref(struct radeon_ctx *ctx); int radeon_ctx_set_draw(struct radeon_ctx *ctx, struct radeon_draw *draw); +int radeon_ctx_set_draw_new(struct radeon_ctx *ctx, struct radeon_draw *draw); int radeon_ctx_pm4(struct radeon_ctx *ctx); int radeon_ctx_submit(struct radeon_ctx *ctx); void radeon_ctx_dump_bof(struct radeon_ctx *ctx, const char *file); diff --git a/src/gallium/targets/dri-r600/Makefile b/src/gallium/targets/dri-r600/Makefile index 9c8b4ab252..932303d194 100644 --- a/src/gallium/targets/dri-r600/Makefile +++ b/src/gallium/targets/dri-r600/Makefile @@ -4,12 +4,12 @@ include $(TOP)/configs/current LIBNAME = r600_dri.so PIPE_DRIVERS = \ - $(TOP)/src/gallium/drivers/r600/libr600.a \ $(TOP)/src/gallium/state_trackers/dri/drm/libdridrm.a \ $(TOP)/src/gallium/winsys/r600/drm/libr600winsys.a \ $(TOP)/src/gallium/drivers/softpipe/libsoftpipe.a \ $(TOP)/src/gallium/drivers/trace/libtrace.a \ - $(TOP)/src/gallium/drivers/rbug/librbug.a + $(TOP)/src/gallium/drivers/rbug/librbug.a \ + $(TOP)/src/gallium/drivers/r600/libr600.a C_SOURCES = \ target.c \ diff --git a/src/gallium/winsys/r600/drm/radeon.c b/src/gallium/winsys/r600/drm/radeon.c index 24d821d5cf..7e65669806 100644 --- a/src/gallium/winsys/r600/drm/radeon.c +++ b/src/gallium/winsys/r600/drm/radeon.c @@ -43,6 +43,16 @@ static int radeon_get_device(struct radeon *radeon) return r; } +/* symbol missing drove me crazy hack to get symbol exported */ +static void fake(void) +{ + struct radeon_ctx *ctx; + struct radeon_draw *draw; + + ctx = radeon_ctx(NULL); + draw = radeon_draw(NULL); +} + struct radeon *radeon_new(int fd, unsigned device) { struct radeon *radeon; @@ -50,6 +60,7 @@ struct radeon *radeon_new(int fd, unsigned device) radeon = calloc(1, sizeof(*radeon)); if (radeon == NULL) { + fake(); return NULL; } radeon->fd = fd; diff --git a/src/gallium/winsys/r600/drm/radeon_ctx.c b/src/gallium/winsys/r600/drm/radeon_ctx.c index af270d5d20..6b0eba0b28 100644 --- a/src/gallium/winsys/r600/drm/radeon_ctx.c +++ b/src/gallium/winsys/r600/drm/radeon_ctx.c @@ -32,8 +32,13 @@ int radeon_ctx_set_bo_new(struct radeon_ctx *ctx, struct radeon_bo *bo) { - if (ctx->nbo >= 2048) - return -EBUSY; + void *ptr; + + ptr = realloc(ctx->bo, sizeof(struct radeon_bo) * (ctx->nbo + 1)); + if (ptr == NULL) { + return -ENOMEM; + } + ctx->bo = ptr; ctx->bo[ctx->nbo] = bo; ctx->nbo++; return 0; @@ -71,26 +76,49 @@ void radeon_ctx_get_placement(struct radeon_ctx *ctx, unsigned reloc, u32 *place } } -static void radeon_ctx_clear(struct radeon_ctx *ctx) +struct radeon_ctx *radeon_ctx(struct radeon *radeon) { - ctx->draw_cpm4 = 0; - ctx->cpm4 = 0; - ctx->ndraw = 0; - ctx->nbo = 0; - ctx->nreloc = 0; + struct radeon_ctx *ctx; + + if (radeon == NULL) + return NULL; + ctx = calloc(1, sizeof(*ctx)); + if (ctx == NULL) + return NULL; + ctx->radeon = radeon_incref(radeon); + return ctx; } -int radeon_ctx_init(struct radeon_ctx *ctx, struct radeon *radeon) +struct radeon_ctx *radeon_ctx_incref(struct radeon_ctx *ctx) { - memset(ctx, 0, sizeof(struct radeon_ctx)); - ctx->radeon = radeon_incref(radeon); - radeon_ctx_clear(ctx); + ctx->refcount++; + return ctx; +} + +struct radeon_ctx *radeon_ctx_decref(struct radeon_ctx *ctx) +{ + unsigned i; + + if (ctx == NULL) + return NULL; + if (--ctx->refcount > 0) { + return NULL; + } + + for (i = 0; i < ctx->ndraw; i++) { + ctx->draw[i] = radeon_draw_decref(ctx->draw[i]); + } + for (i = 0; i < ctx->nbo; i++) { + ctx->bo[i] = radeon_bo_decref(ctx->radeon, ctx->bo[i]); + } + ctx->radeon = radeon_decref(ctx->radeon); + free(ctx->draw); + free(ctx->bo); free(ctx->pm4); - ctx->cpm4 = 0; - ctx->pm4 = malloc(64 * 1024); - if (ctx->pm4 == NULL) - return -ENOMEM; - return 0; + free(ctx->reloc); + memset(ctx, 0, sizeof(*ctx)); + free(ctx); + return NULL; } static int radeon_ctx_state_bo(struct radeon_ctx *ctx, struct radeon_state *state) @@ -115,6 +143,7 @@ static int radeon_ctx_state_bo(struct radeon_ctx *ctx, struct radeon_state *stat return 0; } + int radeon_ctx_submit(struct radeon_ctx *ctx) { struct drm_radeon_cs drmib; @@ -141,7 +170,6 @@ int radeon_ctx_submit(struct radeon_ctx *ctx) r = drmCommandWriteRead(ctx->radeon->fd, DRM_RADEON_CS, &drmib, sizeof(struct drm_radeon_cs)); #endif - radeon_ctx_clear(ctx); return r; } @@ -149,6 +177,7 @@ static int radeon_ctx_reloc(struct radeon_ctx *ctx, struct radeon_bo *bo, unsigned id, unsigned *placement) { unsigned i; + struct radeon_cs_reloc *ptr; for (i = 0; i < ctx->nreloc; i++) { if (ctx->reloc[i].handle == bo->handle) { @@ -156,12 +185,14 @@ static int radeon_ctx_reloc(struct radeon_ctx *ctx, struct radeon_bo *bo, return 0; } } - if (ctx->nreloc >= 2048) - return -EINVAL; - ctx->reloc[ctx->nreloc].handle = bo->handle; - ctx->reloc[ctx->nreloc].read_domain = placement[0] | placement [1]; - ctx->reloc[ctx->nreloc].write_domain = placement[0] | placement [1]; - ctx->reloc[ctx->nreloc].flags = 0; + ptr = realloc(ctx->reloc, sizeof(struct radeon_cs_reloc) * (ctx->nreloc + 1)); + if (ptr == NULL) + return -ENOMEM; + ctx->reloc = ptr; + ptr[ctx->nreloc].handle = bo->handle; + ptr[ctx->nreloc].read_domain = placement[0] | placement [1]; + ptr[ctx->nreloc].write_domain = placement[0] | placement [1]; + ptr[ctx->nreloc].flags = 0; ctx->pm4[id] = ctx->nreloc * sizeof(struct radeon_cs_reloc) / 4; ctx->nreloc++; return 0; @@ -190,13 +221,21 @@ static int radeon_ctx_state_schedule(struct radeon_ctx *ctx, struct radeon_state return 0; } -int radeon_ctx_set_draw(struct radeon_ctx *ctx, struct radeon_draw *draw) +int radeon_ctx_set_draw_new(struct radeon_ctx *ctx, struct radeon_draw *draw) { - unsigned cpm4, i; + struct radeon_draw *pdraw = NULL; + struct radeon_draw **ndraw; + struct radeon_state *nstate, *ostate; + unsigned cpm4, i, cstate; + void *tmp; int r = 0; + ndraw = realloc(ctx->draw, sizeof(void*) * (ctx->ndraw + 1)); + if (ndraw == NULL) + return -ENOMEM; + ctx->draw = ndraw; for (i = 0; i < draw->nstate; i++) { - r = radeon_ctx_state_bo(ctx, &draw->state[i]); + r = radeon_ctx_state_bo(ctx, draw->state[i]); if (r) return r; } @@ -208,48 +247,69 @@ int radeon_ctx_set_draw(struct radeon_ctx *ctx, struct radeon_draw *draw) __func__, draw->cpm4, RADEON_CTX_MAX_PM4); return -EINVAL; } - ctx->draw[ctx->ndraw] = *draw; - for (i = 0, cpm4 = 0; i < draw->nstate - 1; i++) { - ctx->draw[ctx->ndraw].state[i].valid &= ~2; - if (ctx->draw[ctx->ndraw].state[i].valid) { - if (ctx->ndraw > 1 && ctx->draw[ctx->ndraw - 1].state[i].valid) { - if (ctx->draw[ctx->ndraw - 1].state[i].pm4_crc == draw->state[i].pm4_crc) - continue; + tmp = realloc(ctx->state, (ctx->nstate + draw->nstate) * sizeof(void*)); + if (tmp == NULL) + return -ENOMEM; + ctx->state = tmp; + pdraw = ctx->cdraw; + for (i = 0, cpm4 = 0, cstate = ctx->nstate; i < draw->nstate - 1; i++) { + nstate = draw->state[i]; + if (nstate) { + if (pdraw && pdraw->state[i]) { + ostate = pdraw->state[i]; + if (ostate->pm4_crc != nstate->pm4_crc) { + ctx->state[cstate++] = nstate; + cpm4 += nstate->cpm4; + } + } else { + ctx->state[cstate++] = nstate; + cpm4 += nstate->cpm4; } - ctx->draw[ctx->ndraw].state[i].valid |= 2; - cpm4 += ctx->draw[ctx->ndraw].state[i].cpm4; } } /* The last state is the draw state always add it */ - if (!draw->state[i].valid) { + if (draw->state[i] == NULL) { fprintf(stderr, "%s no draw command\n", __func__); return -EINVAL; } - ctx->draw[ctx->ndraw].state[i].valid |= 2; - cpm4 += ctx->draw[ctx->ndraw].state[i].cpm4; + ctx->state[cstate++] = draw->state[i]; + cpm4 += draw->state[i]->cpm4; if ((ctx->draw_cpm4 + cpm4) > RADEON_CTX_MAX_PM4) { /* need to flush */ return -EBUSY; } ctx->draw_cpm4 += cpm4; - ctx->ndraw++; + ctx->nstate = cstate; + ctx->draw[ctx->ndraw++] = draw; + ctx->cdraw = draw; return 0; } +int radeon_ctx_set_draw(struct radeon_ctx *ctx, struct radeon_draw *draw) +{ + int r; + + radeon_draw_incref(draw); + r = radeon_ctx_set_draw_new(ctx, draw); + if (r) + radeon_draw_decref(draw); + return r; +} + int radeon_ctx_pm4(struct radeon_ctx *ctx) { - unsigned i, j, c; + unsigned i; int r; - for (i = 0, c = 0, ctx->id = 0; i < ctx->ndraw; i++) { - for (j = 0; j < ctx->draw[i].nstate; j++) { - if (ctx->draw[i].state[j].valid & 2) { - r = radeon_ctx_state_schedule(ctx, &ctx->draw[i].state[j]); - if (r) - return r; - c += ctx->draw[i].state[j].cpm4; - } - } + free(ctx->pm4); + ctx->cpm4 = 0; + ctx->pm4 = malloc(ctx->draw_cpm4 * 4); + if (ctx->pm4 == NULL) + return -EINVAL; + for (i = 0, ctx->id = 0; i < ctx->nstate; i++) { + r = radeon_ctx_state_schedule(ctx, ctx->state[i]); + if (r) + return r; } if (ctx->id != ctx->draw_cpm4) { fprintf(stderr, "%s miss predicted pm4 size %d for %d\n", diff --git a/src/gallium/winsys/r600/drm/radeon_draw.c b/src/gallium/winsys/r600/drm/radeon_draw.c index 53699eb0b1..4413ed79fb 100644 --- a/src/gallium/winsys/r600/drm/radeon_draw.c +++ b/src/gallium/winsys/r600/drm/radeon_draw.c @@ -31,33 +31,111 @@ /* * draw functions */ -int radeon_draw_init(struct radeon_draw *draw, struct radeon *radeon) +struct radeon_draw *radeon_draw(struct radeon *radeon) { - memset(draw, 0, sizeof(struct radeon_draw)); + struct radeon_draw *draw; + + draw = calloc(1, sizeof(*draw)); + if (draw == NULL) + return NULL; draw->nstate = radeon->nstate; draw->radeon = radeon; - return 0; + draw->refcount = 1; + draw->state = calloc(1, sizeof(void*) * draw->nstate); + if (draw->state == NULL) { + free(draw); + return NULL; + } + return draw; } -int radeon_draw_set(struct radeon_draw *draw, struct radeon_state *state) +struct radeon_draw *radeon_draw_incref(struct radeon_draw *draw) +{ + draw->refcount++; + return draw; +} + +struct radeon_draw *radeon_draw_decref(struct radeon_draw *draw) +{ + unsigned i; + + if (draw == NULL) + return NULL; + if (--draw->refcount > 0) + return NULL; + for (i = 0; i < draw->nstate; i++) { + draw->state[i] = radeon_state_decref(draw->state[i]); + } + free(draw->state); + memset(draw, 0, sizeof(*draw)); + free(draw); + return NULL; +} + +int radeon_draw_set_new(struct radeon_draw *draw, struct radeon_state *state) { if (state == NULL) return 0; if (state->type >= draw->radeon->ntype) return -EINVAL; - draw->state[state->id] = *state; + draw->state[state->id] = radeon_state_decref(draw->state[state->id]); + draw->state[state->id] = state; return 0; } +int radeon_draw_set(struct radeon_draw *draw, struct radeon_state *state) +{ + if (state == NULL) + return 0; + radeon_state_incref(state); + return radeon_draw_set_new(draw, state); +} + int radeon_draw_check(struct radeon_draw *draw) { unsigned i; int r; + r = radeon_draw_pm4(draw); + if (r) + return r; for (i = 0, draw->cpm4 = 0; i < draw->nstate; i++) { - if (draw->state[i].valid) { - draw->cpm4 += draw->state[i].cpm4; + if (draw->state[i]) { + draw->cpm4 += draw->state[i]->cpm4; } } return 0; } + +struct radeon_draw *radeon_draw_duplicate(struct radeon_draw *draw) +{ + struct radeon_draw *ndraw; + unsigned i; + + if (draw == NULL) + return NULL; + ndraw = radeon_draw(draw->radeon); + if (ndraw == NULL) { + return NULL; + } + for (i = 0; i < draw->nstate; i++) { + if (radeon_draw_set(ndraw, draw->state[i])) { + radeon_draw_decref(ndraw); + return NULL; + } + } + return ndraw; +} + +int radeon_draw_pm4(struct radeon_draw *draw) +{ + unsigned i; + int r; + + for (i = 0; i < draw->nstate; i++) { + r = radeon_state_pm4(draw->state[i]); + if (r) + return r; + } + return 0; +} diff --git a/src/gallium/winsys/r600/drm/radeon_priv.h b/src/gallium/winsys/r600/drm/radeon_priv.h index 80392cda96..b91421f438 100644 --- a/src/gallium/winsys/r600/drm/radeon_priv.h +++ b/src/gallium/winsys/r600/drm/radeon_priv.h @@ -24,6 +24,7 @@ #include "radeon.h" struct radeon; +struct radeon_ctx; /* * radeon functions @@ -70,6 +71,34 @@ extern unsigned radeon_type_from_id(struct radeon *radeon, unsigned id); /* * radeon context functions */ +#pragma pack(1) +struct radeon_cs_reloc { + uint32_t handle; + uint32_t read_domain; + uint32_t write_domain; + uint32_t flags; +}; +#pragma pack() + +struct radeon_ctx { + int refcount; + struct radeon *radeon; + u32 *pm4; + u32 cpm4; + u32 draw_cpm4; + unsigned id; + unsigned next_id; + unsigned nreloc; + struct radeon_cs_reloc *reloc; + unsigned nbo; + struct radeon_bo **bo; + unsigned ndraw; + struct radeon_draw *cdraw; + struct radeon_draw **draw; + unsigned nstate; + struct radeon_state **state; +}; + int radeon_ctx_set_bo_new(struct radeon_ctx *ctx, struct radeon_bo *bo); struct radeon_bo *radeon_ctx_get_bo(struct radeon_ctx *ctx, unsigned reloc); void radeon_ctx_get_placement(struct radeon_ctx *ctx, unsigned reloc, u32 *placement); diff --git a/src/gallium/winsys/r600/drm/radeon_state.c b/src/gallium/winsys/r600/drm/radeon_state.c index d7cd1d7a94..308288557a 100644 --- a/src/gallium/winsys/r600/drm/radeon_state.c +++ b/src/gallium/winsys/r600/drm/radeon_state.c @@ -32,23 +32,82 @@ /* * state core functions */ -int radeon_state_init(struct radeon_state *state, struct radeon *radeon, u32 type, u32 id) +struct radeon_state *radeon_state(struct radeon *radeon, u32 type, u32 id) { + struct radeon_state *state; + if (type > radeon->ntype) { fprintf(stderr, "%s invalid type %d\n", __func__, type); - return -EINVAL; + return NULL; } if (id > radeon->nstate) { fprintf(stderr, "%s invalid state id %d\n", __func__, id); - return -EINVAL; + return NULL; } - memset(state, 0, sizeof(struct radeon_state)); + state = calloc(1, sizeof(*state)); + if (state == NULL) + return NULL; state->radeon = radeon; state->type = type; state->id = id; + state->refcount = 1; state->npm4 = radeon->type[type].npm4; state->nstates = radeon->type[type].nstates; - return 0; + state->states = calloc(1, state->nstates * 4); + state->pm4 = calloc(1, radeon->type[type].npm4 * 4); + if (state->states == NULL || state->pm4 == NULL) { + radeon_state_decref(state); + return NULL; + } + return state; +} + +struct radeon_state *radeon_state_duplicate(struct radeon_state *state) +{ + struct radeon_state *nstate = radeon_state(state->radeon, state->type, state->id); + unsigned i; + + if (state == NULL) + return NULL; + nstate->cpm4 = state->cpm4; + nstate->nbo = state->nbo; + nstate->nreloc = state->nreloc; + memcpy(nstate->states, state->states, state->nstates * 4); + memcpy(nstate->pm4, state->pm4, state->npm4 * 4); + memcpy(nstate->placement, state->placement, 8 * 4); + memcpy(nstate->reloc_pm4_id, state->reloc_pm4_id, 8 * 4); + memcpy(nstate->reloc_bo_id, state->reloc_bo_id, 8 * 4); + memcpy(nstate->bo_dirty, state->bo_dirty, 4 * 4); + for (i = 0; i < state->nbo; i++) { + nstate->bo[i] = radeon_bo_incref(state->radeon, state->bo[i]); + } + return nstate; +} + +struct radeon_state *radeon_state_incref(struct radeon_state *state) +{ + state->refcount++; + return state; +} + +struct radeon_state *radeon_state_decref(struct radeon_state *state) +{ + unsigned i; + + if (state == NULL) + return NULL; + if (--state->refcount > 0) { + return NULL; + } + for (i = 0; i < state->nbo; i++) { + state->bo[i] = radeon_bo_decref(state->radeon, state->bo[i]); + } + free(state->immd); + free(state->states); + free(state->pm4); + memset(state, 0, sizeof(*state)); + free(state); + return NULL; } int radeon_state_replace_always(struct radeon_state *ostate, @@ -97,7 +156,6 @@ int radeon_state_pm4(struct radeon_state *state) return r; } state->pm4_crc = crc32(state->pm4, state->cpm4 * 4); - state->valid = 1; return 0; } -- cgit v1.2.3 From 27041d7cb3faeaed483538a228573466363ec1c7 Mon Sep 17 00:00:00 2001 From: Jerome Glisse Date: Fri, 6 Aug 2010 11:28:33 -0400 Subject: r600g: fix color format, indentation, defines Signed-off-by: Jerome Glisse --- src/gallium/drivers/r600/r600_state.c | 2 +- src/gallium/drivers/r600/r600_state_inlines.h | 161 +++++++++++++------------- src/gallium/drivers/r600/r600d.h | 19 +-- 3 files changed, 93 insertions(+), 89 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index e8871cd748..deb9bf3395 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -695,7 +695,7 @@ static struct radeon_state *r600_cb(struct r600_context *rctx, int cb) ntype = 0; desc = util_format_description(rtex->resource.base.b.format); if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) - ntype = NUM_FORMAT_SRGB; + ntype = V_0280A0_NUMBER_SRGB; format = r600_translate_colorformat(rtex->resource.base.b.format); swap = r600_translate_colorswap(rtex->resource.base.b.format); diff --git a/src/gallium/drivers/r600/r600_state_inlines.h b/src/gallium/drivers/r600/r600_state_inlines.h index fdc29386ae..8271ad19fb 100644 --- a/src/gallium/drivers/r600/r600_state_inlines.h +++ b/src/gallium/drivers/r600/r600_state_inlines.h @@ -147,117 +147,117 @@ static uint32_t r600_translate_colorswap(enum pipe_format format) { switch (format) { /* 8-bit buffers. */ - case PIPE_FORMAT_A8_UNORM: - case PIPE_FORMAT_I8_UNORM: - case PIPE_FORMAT_L8_UNORM: - case PIPE_FORMAT_R8_UNORM: - case PIPE_FORMAT_R8_SNORM: - return SWAP_STD; + case PIPE_FORMAT_A8_UNORM: + case PIPE_FORMAT_I8_UNORM: + case PIPE_FORMAT_L8_UNORM: + case PIPE_FORMAT_R8_UNORM: + case PIPE_FORMAT_R8_SNORM: + return V_0280A0_SWAP_STD; /* 16-bit buffers. */ - case PIPE_FORMAT_B5G6R5_UNORM: - return SWAP_STD_REV; + case PIPE_FORMAT_B5G6R5_UNORM: + return V_0280A0_SWAP_STD_REV; - case PIPE_FORMAT_B5G5R5A1_UNORM: - case PIPE_FORMAT_B5G5R5X1_UNORM: - return SWAP_ALT; + case PIPE_FORMAT_B5G5R5A1_UNORM: + case PIPE_FORMAT_B5G5R5X1_UNORM: + return V_0280A0_SWAP_ALT; - case PIPE_FORMAT_B4G4R4A4_UNORM: - case PIPE_FORMAT_B4G4R4X4_UNORM: - return SWAP_ALT; + case PIPE_FORMAT_B4G4R4A4_UNORM: + case PIPE_FORMAT_B4G4R4X4_UNORM: + return V_0280A0_SWAP_ALT; /* 32-bit buffers. */ - case PIPE_FORMAT_A8B8G8R8_SRGB: - return SWAP_STD_REV; - case PIPE_FORMAT_B8G8R8A8_SRGB: - return SWAP_ALT; + case PIPE_FORMAT_A8B8G8R8_SRGB: + return V_0280A0_SWAP_STD_REV; + case PIPE_FORMAT_B8G8R8A8_SRGB: + return V_0280A0_SWAP_ALT; - case PIPE_FORMAT_B8G8R8A8_UNORM: - case PIPE_FORMAT_B8G8R8X8_UNORM: - return SWAP_ALT; + case PIPE_FORMAT_B8G8R8A8_UNORM: + case PIPE_FORMAT_B8G8R8X8_UNORM: + return V_0280A0_SWAP_ALT; - case PIPE_FORMAT_A8R8G8B8_UNORM: - case PIPE_FORMAT_X8R8G8B8_UNORM: - return SWAP_ALT_REV; - case PIPE_FORMAT_R8G8B8A8_SNORM: - case PIPE_FORMAT_R8G8B8X8_UNORM: - return SWAP_STD; + case PIPE_FORMAT_A8R8G8B8_UNORM: + case PIPE_FORMAT_X8R8G8B8_UNORM: + return V_0280A0_SWAP_ALT_REV; + case PIPE_FORMAT_R8G8B8A8_SNORM: + case PIPE_FORMAT_R8G8B8X8_UNORM: + return V_0280A0_SWAP_STD; - case PIPE_FORMAT_A8B8G8R8_UNORM: - case PIPE_FORMAT_X8B8G8R8_UNORM: -// case PIPE_FORMAT_R8SG8SB8UX8U_NORM: - return SWAP_STD_REV; + case PIPE_FORMAT_A8B8G8R8_UNORM: + case PIPE_FORMAT_X8B8G8R8_UNORM: + // case PIPE_FORMAT_R8SG8SB8UX8U_NORM: + return V_0280A0_SWAP_STD_REV; case PIPE_FORMAT_Z24X8_UNORM: case PIPE_FORMAT_Z24_UNORM_S8_USCALED: - return SWAP_STD; + return V_0280A0_SWAP_STD; - case PIPE_FORMAT_R10G10B10A2_UNORM: - case PIPE_FORMAT_R10G10B10X2_SNORM: - case PIPE_FORMAT_B10G10R10A2_UNORM: - case PIPE_FORMAT_R10SG10SB10SA2U_NORM: - return SWAP_STD_REV; + case PIPE_FORMAT_R10G10B10A2_UNORM: + case PIPE_FORMAT_R10G10B10X2_SNORM: + case PIPE_FORMAT_B10G10R10A2_UNORM: + case PIPE_FORMAT_R10SG10SB10SA2U_NORM: + return V_0280A0_SWAP_STD_REV; /* 64-bit buffers. */ - case PIPE_FORMAT_R16G16B16A16_UNORM: - case PIPE_FORMAT_R16G16B16A16_SNORM: -// return V_0280A0_COLOR_16_16_16_16; - case PIPE_FORMAT_R16G16B16A16_FLOAT: -// return V_0280A0_COLOR_16_16_16_16_FLOAT; + case PIPE_FORMAT_R16G16B16A16_UNORM: + case PIPE_FORMAT_R16G16B16A16_SNORM: + // return V_0280A0_COLOR_16_16_16_16; + case PIPE_FORMAT_R16G16B16A16_FLOAT: + // return V_0280A0_COLOR_16_16_16_16_FLOAT; /* 128-bit buffers. */ - case PIPE_FORMAT_R32G32B32A32_FLOAT: -// return V_0280A0_COLOR_32_32_32_32_FLOAT; + case PIPE_FORMAT_R32G32B32A32_FLOAT: + // return V_0280A0_COLOR_32_32_32_32_FLOAT; return 0; default: R600_ERR("unsupported colorswap format %d\n", format); return ~0; } return ~0; - } static INLINE uint32_t r600_translate_colorformat(enum pipe_format format) { switch (format) { /* 8-bit buffers. */ - case PIPE_FORMAT_A8_UNORM: - case PIPE_FORMAT_I8_UNORM: - case PIPE_FORMAT_L8_UNORM: - case PIPE_FORMAT_R8_UNORM: - case PIPE_FORMAT_R8_SNORM: + case PIPE_FORMAT_A8_UNORM: + case PIPE_FORMAT_I8_UNORM: + case PIPE_FORMAT_L8_UNORM: + case PIPE_FORMAT_R8_UNORM: + case PIPE_FORMAT_R8_SNORM: return V_0280A0_COLOR_8; /* 16-bit buffers. */ - case PIPE_FORMAT_B5G6R5_UNORM: + case PIPE_FORMAT_B5G6R5_UNORM: return V_0280A0_COLOR_5_6_5; - case PIPE_FORMAT_B5G5R5A1_UNORM: - case PIPE_FORMAT_B5G5R5X1_UNORM: + case PIPE_FORMAT_B5G5R5A1_UNORM: + case PIPE_FORMAT_B5G5R5X1_UNORM: return V_0280A0_COLOR_1_5_5_5; - case PIPE_FORMAT_B4G4R4A4_UNORM: - case PIPE_FORMAT_B4G4R4X4_UNORM: + case PIPE_FORMAT_B4G4R4A4_UNORM: + case PIPE_FORMAT_B4G4R4X4_UNORM: return V_0280A0_COLOR_4_4_4_4; /* 32-bit buffers. */ - case PIPE_FORMAT_B8G8R8A8_UNORM: - case PIPE_FORMAT_B8G8R8X8_UNORM: - case PIPE_FORMAT_A8R8G8B8_UNORM: - case PIPE_FORMAT_X8R8G8B8_UNORM: - case PIPE_FORMAT_A8B8G8R8_UNORM: - case PIPE_FORMAT_R8G8B8A8_SNORM: - case PIPE_FORMAT_X8B8G8R8_UNORM: - case PIPE_FORMAT_R8G8B8X8_UNORM: - case PIPE_FORMAT_R8SG8SB8UX8U_NORM: - case PIPE_FORMAT_A8B8G8R8_SRGB: - case PIPE_FORMAT_B8G8R8A8_SRGB: + case PIPE_FORMAT_A8B8G8R8_SRGB: + case PIPE_FORMAT_A8B8G8R8_UNORM: + case PIPE_FORMAT_A8R8G8B8_UNORM: + case PIPE_FORMAT_B8G8R8A8_SRGB: + case PIPE_FORMAT_B8G8R8A8_UNORM: + case PIPE_FORMAT_B8G8R8X8_UNORM: + case PIPE_FORMAT_R8G8B8A8_SNORM: + case PIPE_FORMAT_R8G8B8A8_UNORM: + case PIPE_FORMAT_R8G8B8X8_UNORM: + case PIPE_FORMAT_R8SG8SB8UX8U_NORM: + case PIPE_FORMAT_X8B8G8R8_UNORM: + case PIPE_FORMAT_X8R8G8B8_UNORM: return V_0280A0_COLOR_8_8_8_8; - case PIPE_FORMAT_R10G10B10A2_UNORM: - case PIPE_FORMAT_R10G10B10X2_SNORM: - case PIPE_FORMAT_B10G10R10A2_UNORM: - case PIPE_FORMAT_R10SG10SB10SA2U_NORM: + case PIPE_FORMAT_R10G10B10A2_UNORM: + case PIPE_FORMAT_R10G10B10X2_SNORM: + case PIPE_FORMAT_B10G10R10A2_UNORM: + case PIPE_FORMAT_R10SG10SB10SA2U_NORM: return V_0280A0_COLOR_10_10_10_2; case PIPE_FORMAT_Z24X8_UNORM: @@ -265,25 +265,24 @@ static INLINE uint32_t r600_translate_colorformat(enum pipe_format format) return V_0280A0_COLOR_24_8; /* 64-bit buffers. */ - case PIPE_FORMAT_R16G16B16A16_UNORM: - case PIPE_FORMAT_R16G16B16A16_SNORM: + case PIPE_FORMAT_R16G16B16A16_UNORM: + case PIPE_FORMAT_R16G16B16A16_SNORM: return V_0280A0_COLOR_16_16_16_16; - case PIPE_FORMAT_R16G16B16A16_FLOAT: + case PIPE_FORMAT_R16G16B16A16_FLOAT: return V_0280A0_COLOR_16_16_16_16_FLOAT; - case PIPE_FORMAT_R32G32_FLOAT: + case PIPE_FORMAT_R32G32_FLOAT: return V_0280A0_COLOR_32_32_FLOAT; /* 128-bit buffers. */ - case PIPE_FORMAT_R32G32B32_FLOAT: - case PIPE_FORMAT_R32G32B32A32_FLOAT: + case PIPE_FORMAT_R32G32B32_FLOAT: + case PIPE_FORMAT_R32G32B32A32_FLOAT: return V_0280A0_COLOR_32_32_32_32_FLOAT; /* YUV buffers. */ - case PIPE_FORMAT_UYVY: -// return R300_COLOR_FORMAT_YVYU; - case PIPE_FORMAT_YUYV: -// return R300_COLOR_FORMAT_VYUY; - default: + case PIPE_FORMAT_UYVY: + case PIPE_FORMAT_YUYV: + default: + R600_ERR("unsupported color format %d\n", format); return ~0; /* Unsupported. */ } } diff --git a/src/gallium/drivers/r600/r600d.h b/src/gallium/drivers/r600/r600d.h index f9cad93185..fb71b1e5d1 100644 --- a/src/gallium/drivers/r600/r600d.h +++ b/src/gallium/drivers/r600/r600d.h @@ -209,12 +209,24 @@ #define S_0280A0_NUMBER_TYPE(x) (((x) & 0x7) << 12) #define G_0280A0_NUMBER_TYPE(x) (((x) >> 12) & 0x7) #define C_0280A0_NUMBER_TYPE 0xFFFF8FFF +#define V_0280A0_NUMBER_UNORM 0x00000000 +#define V_0280A0_NUMBER_SNORM 0x00000001 +#define V_0280A0_NUMBER_USCALED 0x00000002 +#define V_0280A0_NUMBER_SSCALED 0x00000003 +#define V_0280A0_NUMBER_UINT 0x00000004 +#define V_0280A0_NUMBER_SINT 0x00000005 +#define V_0280A0_NUMBER_SRGB 0x00000006 +#define V_0280A0_NUMBER_FLOAT 0x00000007 #define S_0280A0_READ_SIZE(x) (((x) & 0x1) << 15) #define G_0280A0_READ_SIZE(x) (((x) >> 15) & 0x1) #define C_0280A0_READ_SIZE 0xFFFF7FFF #define S_0280A0_COMP_SWAP(x) (((x) & 0x3) << 16) #define G_0280A0_COMP_SWAP(x) (((x) >> 16) & 0x3) #define C_0280A0_COMP_SWAP 0xFFFCFFFF +#define V_0280A0_SWAP_STD 0x00000000 +#define V_0280A0_SWAP_ALT 0x00000001 +#define V_0280A0_SWAP_STD_REV 0x00000002 +#define V_0280A0_SWAP_ALT_REV 0x00000003 #define S_0280A0_TILE_MODE(x) (((x) & 0x3) << 18) #define G_0280A0_TILE_MODE(x) (((x) >> 18) & 0x3) #define C_0280A0_TILE_MODE 0xFFF3FFFF @@ -1169,11 +1181,4 @@ #define G_0286D4_PNT_SPRITE_TOP_1(x) (((x) >> 14) & 0x1) #define C_0286D4_PNT_SPRITE_TOP_1 0xFFFFBFFF -/* temporary swap */ -#define SWAP_STD 0 -#define SWAP_ALT 1 -#define SWAP_STD_REV 2 -#define SWAP_ALT_REV 3 - -#define NUM_FORMAT_SRGB 6 #endif -- cgit v1.2.3 From c3ad060488ffd98f1c6dc9127b46324c5201f434 Mon Sep 17 00:00:00 2001 From: Jerome Glisse Date: Fri, 6 Aug 2010 16:10:25 -0400 Subject: r600g: finish multi target rendering support Signed-off-by: Jerome Glisse --- src/gallium/drivers/r600/r600_context.c | 6 +- src/gallium/drivers/r600/r600_shader.c | 52 ++++++++++------- src/gallium/drivers/r600/r600_shader.h | 9 +++ src/gallium/drivers/r600/r600_state.c | 3 +- src/gallium/drivers/r600/radeon.h | 30 +++++++--- src/gallium/winsys/r600/drm/r600_states.h | 96 +++++++++++++++++++++++++++++-- 6 files changed, 158 insertions(+), 38 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/r600_context.c b/src/gallium/drivers/r600/r600_context.c index ae1780a1d4..29dc93bae6 100644 --- a/src/gallium/drivers/r600/r600_context.c +++ b/src/gallium/drivers/r600/r600_context.c @@ -47,14 +47,16 @@ void r600_flush(struct pipe_context *ctx, unsigned flags, struct r600_context *rctx = r600_context(ctx); struct r600_screen *rscreen = rctx->screen; static int dc = 0; + char dname[256]; if (radeon_ctx_pm4(rctx->ctx)) return; /* FIXME dumping should be removed once shader support instructions * without throwing bad code */ - if (!dc) - radeon_ctx_dump_bof(rctx->ctx, "gallium.bof"); + sprintf(dname, "gallium-%08d.bof", dc); + if (dc < 10) + radeon_ctx_dump_bof(rctx->ctx, dname); #if 1 radeon_ctx_submit(rctx->ctx); #endif diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index dc8d4cb315..33dff97d22 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -339,7 +339,7 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s { struct tgsi_full_immediate *immediate; struct r600_shader_ctx ctx; - struct r600_bc_output output; + struct r600_bc_output output[32]; unsigned opcode; int i, r = 0, pos0; @@ -418,33 +418,37 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s } /* export output */ for (i = 0, pos0 = 0; i < shader->noutput; i++) { - memset(&output, 0, sizeof(struct r600_bc_output)); - output.gpr = shader->output[i].gpr; - output.elem_size = 3; - output.swizzle_x = 0; - output.swizzle_y = 1; - output.swizzle_z = 2; - output.swizzle_w = 3; - output.barrier = 1; - output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM; - output.array_base = i - pos0; - output.inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE; + memset(&output[i], 0, sizeof(struct r600_bc_output)); + output[i].gpr = shader->output[i].gpr; + output[i].elem_size = 3; + output[i].swizzle_x = 0; + output[i].swizzle_y = 1; + output[i].swizzle_z = 2; + output[i].swizzle_w = 3; + output[i].barrier = 1; + output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM; + output[i].array_base = i - pos0; + output[i].inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT; switch (ctx.type == TGSI_PROCESSOR_VERTEX) { case TGSI_PROCESSOR_VERTEX: + shader->output[i].type = r600_export_parameter; if (shader->output[i].name == TGSI_SEMANTIC_POSITION) { - output.array_base = 60; - output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; + shader->output[i].type = r600_export_position; + output[i].array_base = 60; + output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; /* position doesn't count in array_base */ pos0 = 1; } break; case TGSI_PROCESSOR_FRAGMENT: + shader->output[i].type = r600_export_framebuffer; if (shader->output[i].name == TGSI_SEMANTIC_COLOR) { - output.array_base = 0; - output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; + output[i].array_base = 0; + output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; } else if (shader->output[i].name == TGSI_SEMANTIC_POSITION) { - output.array_base = 61; - output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; + shader->output[i].type = r600_export_position; + output[i].array_base = 61; + output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; } else { R600_ERR("unsupported fragment output name %d\n", shader->output[i].name); r = -EINVAL; @@ -457,9 +461,17 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s goto out_err; } if (i == (shader->noutput - 1)) { - output.end_of_program = 1; + output[i].end_of_program = 1; } - r = r600_bc_add_output(ctx.bc, &output); + } + for (i = shader->noutput - 1, shader->output_done = 0; i >= 0; i--) { + if (!(shader->output_done & (1 << output[i].type))) { + shader->output_done |= (1 << output[i].type); + output[i].inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE; + } + } + for (i = 0; i < shader->noutput; i++) { + r = r600_bc_add_output(ctx.bc, &output[i]); if (r) goto out_err; } diff --git a/src/gallium/drivers/r600/r600_shader.h b/src/gallium/drivers/r600/r600_shader.h index ee0381e8bd..15562c19a5 100644 --- a/src/gallium/drivers/r600/r600_shader.h +++ b/src/gallium/drivers/r600/r600_shader.h @@ -25,9 +25,17 @@ #include "r600_asm.h" +enum r600_export_type { + r600_export_position = 0, + r600_export_parameter, + r600_export_framebuffer, +}; + struct r600_shader_io { unsigned name; unsigned gpr; + unsigned done; + unsigned type; int sid; unsigned interpolate; }; @@ -41,6 +49,7 @@ struct r600_shader { struct r600_shader_io input[32]; struct r600_shader_io output[32]; enum radeon_family family; + unsigned output_done; }; #endif diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index deb9bf3395..ef6c1bedeb 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -675,9 +675,8 @@ static struct radeon_state *r600_cb(struct r600_context *rctx, int cb) unsigned color_info; unsigned format, swap, ntype; const struct util_format_description *desc; - int id = R600_CB0 + cb; - rstate = radeon_state(rscreen->rw, R600_CB0_TYPE, id); + rstate = radeon_state(rscreen->rw, R600_CB0_TYPE + cb, R600_CB0 + cb); if (rstate == NULL) return NULL; rtex = (struct r600_resource_texture*)state->cbufs[cb]->texture; diff --git a/src/gallium/drivers/r600/radeon.h b/src/gallium/drivers/r600/radeon.h index 3a8405f9b4..00cff41b4f 100644 --- a/src/gallium/drivers/r600/radeon.h +++ b/src/gallium/drivers/r600/radeon.h @@ -160,8 +160,8 @@ void radeon_ctx_dump_bof(struct radeon_ctx *ctx, const char *file); * R600/R700 */ -#define R600_NSTATE 1273 -#define R600_NTYPE 25 +#define R600_NSTATE 1280 +#define R600_NTYPE 32 #define R600_CONFIG 0 #define R600_CONFIG_TYPE 0 @@ -207,12 +207,26 @@ void radeon_ctx_dump_bof(struct radeon_ctx *ctx, const char *file); #define R600_GS_SAMPLER_BORDER_TYPE 20 #define R600_CB0 1269 #define R600_CB0_TYPE 21 -#define R600_DB 1270 -#define R600_DB_TYPE 22 -#define R600_VGT 1271 -#define R600_VGT_TYPE 23 -#define R600_DRAW 1272 -#define R600_DRAW_TYPE 24 +#define R600_CB1 1270 +#define R600_CB1_TYPE 22 +#define R600_CB2 1271 +#define R600_CB2_TYPE 23 +#define R600_CB3 1272 +#define R600_CB3_TYPE 24 +#define R600_CB4 1273 +#define R600_CB4_TYPE 25 +#define R600_CB5 1274 +#define R600_CB5_TYPE 26 +#define R600_CB6 1275 +#define R600_CB6_TYPE 27 +#define R600_CB7 1276 +#define R600_CB7_TYPE 28 +#define R600_DB 1277 +#define R600_DB_TYPE 29 +#define R600_VGT 1278 +#define R600_VGT_TYPE 30 +#define R600_DRAW 1279 +#define R600_DRAW_TYPE 31 /* R600_CONFIG */ #define R600_CONFIG__SQ_CONFIG 0 #define R600_CONFIG__SQ_GPR_RESOURCE_MGMT_1 1 diff --git a/src/gallium/winsys/r600/drm/r600_states.h b/src/gallium/winsys/r600/drm/r600_states.h index 5896df21b2..e40c77d8f6 100644 --- a/src/gallium/winsys/r600/drm/r600_states.h +++ b/src/gallium/winsys/r600/drm/r600_states.h @@ -372,6 +372,76 @@ static const struct radeon_register R600_CB0_names[] = { {0x00028100, 0, 0, "CB_COLOR0_MASK"}, }; +static const struct radeon_register R600_CB1_names[] = { + {0x00028044, 1, 0, "CB_COLOR1_BASE"}, + {0x000280A4, 0, 0, "CB_COLOR1_INFO"}, + {0x00028064, 0, 0, "CB_COLOR1_SIZE"}, + {0x00028084, 0, 0, "CB_COLOR1_VIEW"}, + {0x000280E4, 1, 1, "CB_COLOR1_FRAG"}, + {0x000280C4, 1, 2, "CB_COLOR1_TILE"}, + {0x00028104, 0, 0, "CB_COLOR1_MASK"}, +}; + +static const struct radeon_register R600_CB2_names[] = { + {0x00028048, 1, 0, "CB_COLOR2_BASE"}, + {0x000280A8, 0, 0, "CB_COLOR2_INFO"}, + {0x00028068, 0, 0, "CB_COLOR2_SIZE"}, + {0x00028088, 0, 0, "CB_COLOR2_VIEW"}, + {0x000280E8, 1, 1, "CB_COLOR2_FRAG"}, + {0x000280C8, 1, 2, "CB_COLOR2_TILE"}, + {0x00028108, 0, 0, "CB_COLOR2_MASK"}, +}; + +static const struct radeon_register R600_CB3_names[] = { + {0x0002804C, 1, 0, "CB_COLOR3_BASE"}, + {0x000280AC, 0, 0, "CB_COLOR3_INFO"}, + {0x0002806C, 0, 0, "CB_COLOR3_SIZE"}, + {0x0002808C, 0, 0, "CB_COLOR3_VIEW"}, + {0x000280EC, 1, 1, "CB_COLOR3_FRAG"}, + {0x000280CC, 1, 2, "CB_COLOR3_TILE"}, + {0x0002810C, 0, 0, "CB_COLOR3_MASK"}, +}; + +static const struct radeon_register R600_CB4_names[] = { + {0x00028050, 1, 0, "CB_COLOR4_BASE"}, + {0x000280B0, 0, 0, "CB_COLOR4_INFO"}, + {0x00028070, 0, 0, "CB_COLOR4_SIZE"}, + {0x00028090, 0, 0, "CB_COLOR4_VIEW"}, + {0x000280F0, 1, 1, "CB_COLOR4_FRAG"}, + {0x000280D0, 1, 2, "CB_COLOR4_TILE"}, + {0x00028110, 0, 0, "CB_COLOR4_MASK"}, +}; + +static const struct radeon_register R600_CB5_names[] = { + {0x00028054, 1, 0, "CB_COLOR5_BASE"}, + {0x000280B4, 0, 0, "CB_COLOR5_INFO"}, + {0x00028074, 0, 0, "CB_COLOR5_SIZE"}, + {0x00028094, 0, 0, "CB_COLOR5_VIEW"}, + {0x000280F4, 1, 1, "CB_COLOR5_FRAG"}, + {0x000280D4, 1, 2, "CB_COLOR5_TILE"}, + {0x00028114, 0, 0, "CB_COLOR5_MASK"}, +}; + +static const struct radeon_register R600_CB6_names[] = { + {0x00028058, 1, 0, "CB_COLOR6_BASE"}, + {0x000280B8, 0, 0, "CB_COLOR6_INFO"}, + {0x00028078, 0, 0, "CB_COLOR6_SIZE"}, + {0x00028098, 0, 0, "CB_COLOR6_VIEW"}, + {0x000280F8, 1, 1, "CB_COLOR6_FRAG"}, + {0x000280D8, 1, 2, "CB_COLOR6_TILE"}, + {0x00028118, 0, 0, "CB_COLOR6_MASK"}, +}; + +static const struct radeon_register R600_CB7_names[] = { + {0x0002805C, 1, 0, "CB_COLOR7_BASE"}, + {0x000280BC, 0, 0, "CB_COLOR7_INFO"}, + {0x0002807C, 0, 0, "CB_COLOR7_SIZE"}, + {0x0002809C, 0, 0, "CB_COLOR7_VIEW"}, + {0x000280FC, 1, 1, "CB_COLOR7_FRAG"}, + {0x000280DC, 1, 2, "CB_COLOR7_TILE"}, + {0x0002811C, 0, 0, "CB_COLOR7_MASK"}, +}; + static const struct radeon_register R600_DB_names[] = { {0x0002800C, 1, 0, "DB_DEPTH_BASE"}, {0x00028000, 0, 0, "DB_DEPTH_SIZE"}, @@ -425,9 +495,16 @@ static struct radeon_type R600_types[] = { { 128, 1233, 0x0000A600, 0x0000A720, 0x0010, 0, "R600_VS_SAMPLER_BORDER", 4, r600_state_pm4_generic, R600_VS_SAMPLER_BORDER_names}, { 128, 1251, 0x0000A800, 0x0000A920, 0x0010, 0, "R600_GS_SAMPLER_BORDER", 4, r600_state_pm4_generic, R600_GS_SAMPLER_BORDER_names}, { 128, 1269, 0x00000000, 0x00000000, 0x0000, 0, "R600_CB0", 7, r600_state_pm4_cb0, R600_CB0_names}, - { 128, 1270, 0x00000000, 0x00000000, 0x0000, 0, "R600_DB", 6, r600_state_pm4_db, R600_DB_names}, - { 128, 1271, 0x00000000, 0x00000000, 0x0000, 0, "R600_VGT", 11, r600_state_pm4_vgt, R600_VGT_names}, - { 128, 1272, 0x00000000, 0x00000000, 0x0000, 0, "R600_DRAW", 4, r600_state_pm4_draw, R600_DRAW_names}, + { 128, 1270, 0x00000000, 0x00000000, 0x0000, 0, "R600_CB1", 7, r600_state_pm4_cb0, R600_CB1_names}, + { 128, 1271, 0x00000000, 0x00000000, 0x0000, 0, "R600_CB2", 7, r600_state_pm4_cb0, R600_CB2_names}, + { 128, 1272, 0x00000000, 0x00000000, 0x0000, 0, "R600_CB3", 7, r600_state_pm4_cb0, R600_CB3_names}, + { 128, 1273, 0x00000000, 0x00000000, 0x0000, 0, "R600_CB4", 7, r600_state_pm4_cb0, R600_CB4_names}, + { 128, 1274, 0x00000000, 0x00000000, 0x0000, 0, "R600_CB5", 7, r600_state_pm4_cb0, R600_CB5_names}, + { 128, 1275, 0x00000000, 0x00000000, 0x0000, 0, "R600_CB6", 7, r600_state_pm4_cb0, R600_CB6_names}, + { 128, 1276, 0x00000000, 0x00000000, 0x0000, 0, "R600_CB7", 7, r600_state_pm4_cb0, R600_CB7_names}, + { 128, 1277, 0x00000000, 0x00000000, 0x0000, 0, "R600_DB", 6, r600_state_pm4_db, R600_DB_names}, + { 128, 1278, 0x00000000, 0x00000000, 0x0000, 0, "R600_VGT", 11, r600_state_pm4_vgt, R600_VGT_names}, + { 128, 1279, 0x00000000, 0x00000000, 0x0000, 0, "R600_DRAW", 4, r600_state_pm4_draw, R600_DRAW_names}, }; static struct radeon_type R700_types[] = { @@ -453,9 +530,16 @@ static struct radeon_type R700_types[] = { { 128, 1233, 0x0000A600, 0x0000A720, 0x0010, 0, "R600_VS_SAMPLER_BORDER", 4, r600_state_pm4_generic, R600_VS_SAMPLER_BORDER_names}, { 128, 1251, 0x0000A800, 0x0000A920, 0x0010, 0, "R600_GS_SAMPLER_BORDER", 4, r600_state_pm4_generic, R600_GS_SAMPLER_BORDER_names}, { 128, 1269, 0x00000000, 0x00000000, 0x0000, 0, "R600_CB0", 7, r700_state_pm4_cb0, R600_CB0_names}, - { 128, 1270, 0x00000000, 0x00000000, 0x0000, 0, "R600_DB", 6, r700_state_pm4_db, R600_DB_names}, - { 128, 1271, 0x00000000, 0x00000000, 0x0000, 0, "R600_VGT", 11, r600_state_pm4_vgt, R600_VGT_names}, - { 128, 1272, 0x00000000, 0x00000000, 0x0000, 0, "R600_DRAW", 4, r600_state_pm4_draw, R600_DRAW_names}, + { 128, 1270, 0x00000000, 0x00000000, 0x0000, 0, "R600_CB1", 7, r600_state_pm4_cb0, R600_CB1_names}, + { 128, 1271, 0x00000000, 0x00000000, 0x0000, 0, "R600_CB2", 7, r600_state_pm4_cb0, R600_CB2_names}, + { 128, 1272, 0x00000000, 0x00000000, 0x0000, 0, "R600_CB3", 7, r600_state_pm4_cb0, R600_CB3_names}, + { 128, 1273, 0x00000000, 0x00000000, 0x0000, 0, "R600_CB4", 7, r600_state_pm4_cb0, R600_CB4_names}, + { 128, 1274, 0x00000000, 0x00000000, 0x0000, 0, "R600_CB5", 7, r600_state_pm4_cb0, R600_CB5_names}, + { 128, 1275, 0x00000000, 0x00000000, 0x0000, 0, "R600_CB6", 7, r600_state_pm4_cb0, R600_CB6_names}, + { 128, 1276, 0x00000000, 0x00000000, 0x0000, 0, "R600_CB7", 7, r600_state_pm4_cb0, R600_CB7_names}, + { 128, 1277, 0x00000000, 0x00000000, 0x0000, 0, "R600_DB", 6, r700_state_pm4_db, R600_DB_names}, + { 128, 1278, 0x00000000, 0x00000000, 0x0000, 0, "R600_VGT", 11, r600_state_pm4_vgt, R600_VGT_names}, + { 128, 1279, 0x00000000, 0x00000000, 0x0000, 0, "R600_DRAW", 4, r600_state_pm4_draw, R600_DRAW_names}, }; #endif -- cgit v1.2.3 From 14e9fbee1cef281c6849a5f2a6d2cc66bfd4b3fd Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 6 Aug 2010 15:09:41 -0600 Subject: gallium: remove stray semicolons --- src/gallium/auxiliary/gallivm/lp_bld_init.c | 2 +- src/gallium/auxiliary/tgsi/tgsi_sanity.c | 2 +- src/gallium/auxiliary/util/u_cpu_detect.c | 2 +- src/gallium/drivers/llvmpipe/lp_context.c | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/auxiliary/gallivm/lp_bld_init.c b/src/gallium/auxiliary/gallivm/lp_bld_init.c index ef0888079c..60d8bcfa55 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_init.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_init.c @@ -46,7 +46,7 @@ static const struct debug_named_value lp_bld_debug_flags[] = { DEBUG_NAMED_VALUE_END }; -DEBUG_GET_ONCE_FLAGS_OPTION(gallivm_debug, "GALLIVM_DEBUG", lp_bld_debug_flags, 0); +DEBUG_GET_ONCE_FLAGS_OPTION(gallivm_debug, "GALLIVM_DEBUG", lp_bld_debug_flags, 0) #endif diff --git a/src/gallium/auxiliary/tgsi/tgsi_sanity.c b/src/gallium/auxiliary/tgsi/tgsi_sanity.c index 9e02040f6c..287ee006cf 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_sanity.c +++ b/src/gallium/auxiliary/tgsi/tgsi_sanity.c @@ -34,7 +34,7 @@ #include "tgsi_iterate.h" -DEBUG_GET_ONCE_BOOL_OPTION(print_sanity, "TGSI_PRINT_SANITY", TRUE); +DEBUG_GET_ONCE_BOOL_OPTION(print_sanity, "TGSI_PRINT_SANITY", TRUE) typedef struct { diff --git a/src/gallium/auxiliary/util/u_cpu_detect.c b/src/gallium/auxiliary/util/u_cpu_detect.c index 6f38d22285..b1a8c75b99 100644 --- a/src/gallium/auxiliary/util/u_cpu_detect.c +++ b/src/gallium/auxiliary/util/u_cpu_detect.c @@ -73,7 +73,7 @@ #endif -DEBUG_GET_ONCE_BOOL_OPTION(dump_cpu, "GALLIUM_DUMP_CPU", TRUE); +DEBUG_GET_ONCE_BOOL_OPTION(dump_cpu, "GALLIUM_DUMP_CPU", TRUE) struct util_cpu_caps util_cpu_caps; diff --git a/src/gallium/drivers/llvmpipe/lp_context.c b/src/gallium/drivers/llvmpipe/lp_context.c index 28793682ed..7543bd7b2b 100644 --- a/src/gallium/drivers/llvmpipe/lp_context.c +++ b/src/gallium/drivers/llvmpipe/lp_context.c @@ -47,7 +47,7 @@ #include "lp_setup.h" -DEBUG_GET_ONCE_BOOL_OPTION(lp_no_rast, "LP_NO_RAST", FALSE); +DEBUG_GET_ONCE_BOOL_OPTION(lp_no_rast, "LP_NO_RAST", FALSE) static void llvmpipe_destroy( struct pipe_context *pipe ) -- cgit v1.2.3 From b474478f206c6d81af78696d3d5ce156d4d413d7 Mon Sep 17 00:00:00 2001 From: Jerome Glisse Date: Fri, 6 Aug 2010 17:12:37 -0400 Subject: r600g: really fix multi target support Signed-off-by: Jerome Glisse --- src/gallium/drivers/r600/r600_context.c | 20 +------------------- src/gallium/drivers/r600/r600_shader.c | 19 ++++++++----------- src/gallium/drivers/r600/r600_shader.h | 8 -------- src/gallium/drivers/r600/r600_state.c | 25 +++++++++++++++++-------- 4 files changed, 26 insertions(+), 46 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/r600_context.c b/src/gallium/drivers/r600/r600_context.c index 29dc93bae6..052eb1cd6d 100644 --- a/src/gallium/drivers/r600/r600_context.c +++ b/src/gallium/drivers/r600/r600_context.c @@ -55,7 +55,7 @@ void r600_flush(struct pipe_context *ctx, unsigned flags, * without throwing bad code */ sprintf(dname, "gallium-%08d.bof", dc); - if (dc < 10) + if (dc < 1) radeon_ctx_dump_bof(rctx->ctx, dname); #if 1 radeon_ctx_submit(rctx->ctx); @@ -204,24 +204,6 @@ static void r600_init_config(struct r600_context *rctx) num_es_stack_entries = 0; break; } - printf("ps_prio : %d\n", ps_prio); - printf("vs_prio : %d\n", vs_prio); - printf("gs_prio : %d\n", gs_prio); - printf("es_prio : %d\n", es_prio); - printf("num_ps_gprs : %d\n", num_ps_gprs); - printf("num_vs_gprs : %d\n", num_vs_gprs); - printf("num_gs_gprs : %d\n", num_gs_gprs); - printf("num_es_gprs : %d\n", num_es_gprs); - printf("num_temp_gprs : %d\n", num_temp_gprs); - printf("num_ps_threads : %d\n", num_ps_threads); - printf("num_vs_threads : %d\n", num_vs_threads); - printf("num_gs_threads : %d\n", num_gs_threads); - printf("num_es_threads : %d\n", num_es_threads); - printf("num_ps_stack_entries : %d\n", num_ps_stack_entries); - printf("num_vs_stack_entries : %d\n", num_vs_stack_entries); - printf("num_gs_stack_entries : %d\n", num_gs_stack_entries); - printf("num_es_stack_entries : %d\n", num_es_stack_entries); - rctx->hw_states.config = radeon_state(rctx->rw, R600_CONFIG_TYPE, R600_CONFIG); rctx->hw_states.config->states[R600_CONFIG__SQ_CONFIG] = 0x00000000; diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index 33dff97d22..8a778f5fd6 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -105,8 +105,8 @@ int r600_pipe_shader_create(struct pipe_context *ctx, struct r600_screen *rscreen = r600_screen(ctx->screen); int r; -fprintf(stderr, "--------------------------------------------------------------\n"); -tgsi_dump(tokens, 0); +//fprintf(stderr, "--------------------------------------------------------------\n"); +//tgsi_dump(tokens, 0); if (rpshader == NULL) return -ENOMEM; rpshader->shader.family = radeon_get_family(rscreen->rw); @@ -120,7 +120,7 @@ tgsi_dump(tokens, 0); R600_ERR("building bytecode failed !\n"); return r; } -fprintf(stderr, "______________________________________________________________\n"); +//fprintf(stderr, "______________________________________________________________\n"); return 0; } @@ -340,6 +340,7 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s struct tgsi_full_immediate *immediate; struct r600_shader_ctx ctx; struct r600_bc_output output[32]; + unsigned output_done; unsigned opcode; int i, r = 0, pos0; @@ -431,9 +432,7 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s output[i].inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT; switch (ctx.type == TGSI_PROCESSOR_VERTEX) { case TGSI_PROCESSOR_VERTEX: - shader->output[i].type = r600_export_parameter; if (shader->output[i].name == TGSI_SEMANTIC_POSITION) { - shader->output[i].type = r600_export_position; output[i].array_base = 60; output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; /* position doesn't count in array_base */ @@ -441,12 +440,10 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s } break; case TGSI_PROCESSOR_FRAGMENT: - shader->output[i].type = r600_export_framebuffer; if (shader->output[i].name == TGSI_SEMANTIC_COLOR) { - output[i].array_base = 0; + output[i].array_base = shader->output[i].sid; output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; } else if (shader->output[i].name == TGSI_SEMANTIC_POSITION) { - shader->output[i].type = r600_export_position; output[i].array_base = 61; output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; } else { @@ -464,9 +461,9 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s output[i].end_of_program = 1; } } - for (i = shader->noutput - 1, shader->output_done = 0; i >= 0; i--) { - if (!(shader->output_done & (1 << output[i].type))) { - shader->output_done |= (1 << output[i].type); + for (i = shader->noutput - 1, output_done = 0; i >= 0; i--) { + if (!(output_done & (1 << output[i].type))) { + output_done |= (1 << output[i].type); output[i].inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE; } } diff --git a/src/gallium/drivers/r600/r600_shader.h b/src/gallium/drivers/r600/r600_shader.h index 15562c19a5..2ee7780ead 100644 --- a/src/gallium/drivers/r600/r600_shader.h +++ b/src/gallium/drivers/r600/r600_shader.h @@ -25,17 +25,10 @@ #include "r600_asm.h" -enum r600_export_type { - r600_export_position = 0, - r600_export_parameter, - r600_export_framebuffer, -}; - struct r600_shader_io { unsigned name; unsigned gpr; unsigned done; - unsigned type; int sid; unsigned interpolate; }; @@ -49,7 +42,6 @@ struct r600_shader { struct r600_shader_io input[32]; struct r600_shader_io output[32]; enum radeon_family family; - unsigned output_done; }; #endif diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index ef6c1bedeb..223f2f3900 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -1205,7 +1205,7 @@ static struct radeon_state *r600_cb_cntl(struct r600_context *rctx) struct r600_screen *rscreen = rctx->screen; struct radeon_state *rstate; const struct pipe_blend_state *pbs = &rctx->blend->state.blend; - int nr_cbufs = rctx->framebuffer->state.framebuffer.nr_cbufs; + int nr_cbufs = rctx->framebuffer->state.framebuffer.nr_cbufs; uint32_t color_control, target_mask, shader_mask; int i; @@ -1214,20 +1214,29 @@ static struct radeon_state *r600_cb_cntl(struct r600_context *rctx) color_control = S_028808_PER_MRT_BLEND(1); for (i = 0; i < nr_cbufs; i++) { - shader_mask |= 0xf << i; + shader_mask |= 0xf << (i * 4); } if (pbs->logicop_enable) { color_control |= (pbs->logicop_func) << 16; - } else + } else { color_control |= (0xcc << 16); + } - for (i = 0; i < 8; i++) { - if (pbs->rt[i].blend_enable) { - color_control |= S_028808_TARGET_BLEND_ENABLE(1 << i); + if (pbs->independent_blend_enable) { + for (i = 0; i < 8; i++) { + if (pbs->rt[i].blend_enable) { + color_control |= S_028808_TARGET_BLEND_ENABLE(1 << i); + } + target_mask |= (pbs->rt[i].colormask << (4 * i)); + } + } else { + for (i = 0; i < 8; i++) { + if (pbs->rt[0].blend_enable) { + color_control |= S_028808_TARGET_BLEND_ENABLE(1 << i); + } + target_mask |= (pbs->rt[0].colormask << (4 * i)); } - target_mask |= (pbs->rt[i].colormask << (4 * i)); - } rstate = radeon_state(rscreen->rw, R600_CB_CNTL_TYPE, R600_CB_CNTL); rstate->states[R600_CB_CNTL__CB_SHADER_MASK] = shader_mask; -- cgit v1.2.3 From 32251c34f06ef91759fa75271ce724a06483cc42 Mon Sep 17 00:00:00 2001 From: Jerome Glisse Date: Fri, 6 Aug 2010 17:22:45 -0400 Subject: r600g: fix rendering, only enable target we write too Signed-off-by: Jerome Glisse --- src/gallium/drivers/r600/r600_state.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index 223f2f3900..ff621084d4 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -1224,14 +1224,14 @@ static struct radeon_state *r600_cb_cntl(struct r600_context *rctx) } if (pbs->independent_blend_enable) { - for (i = 0; i < 8; i++) { + for (i = 0; i < nr_cbufs; i++) { if (pbs->rt[i].blend_enable) { color_control |= S_028808_TARGET_BLEND_ENABLE(1 << i); } target_mask |= (pbs->rt[i].colormask << (4 * i)); } } else { - for (i = 0; i < 8; i++) { + for (i = 0; i < nr_cbufs; i++) { if (pbs->rt[0].blend_enable) { color_control |= S_028808_TARGET_BLEND_ENABLE(1 << i); } -- cgit v1.2.3 From d9f72b9f909b32ff0adacf939c75eb2924ed133b Mon Sep 17 00:00:00 2001 From: Jerome Glisse Date: Fri, 6 Aug 2010 17:27:36 -0400 Subject: r600g: add PA_CL_CLIP_CNTL definition Signed-off-by: Jerome Glisse --- src/gallium/drivers/r600/r600d.h | 55 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/r600d.h b/src/gallium/drivers/r600/r600d.h index fb71b1e5d1..af93731550 100644 --- a/src/gallium/drivers/r600/r600d.h +++ b/src/gallium/drivers/r600/r600d.h @@ -352,6 +352,61 @@ #define S_028808_ROP3(x) (((x) & 0xFF) << 16) #define G_028808_ROP3(x) (((x) >> 16) & 0xFF) #define C_028808_ROP3 0xFF00FFFF +#define R_028810_PA_CL_CLIP_CNTL 0x028810 +#define S_028810_UCP_ENA_0(x) (((x) & 0x1) << 0) +#define G_028810_UCP_ENA_0(x) (((x) >> 0) & 0x1) +#define C_028810_UCP_ENA_0 0xFFFFFFFE +#define S_028810_UCP_ENA_1(x) (((x) & 0x1) << 1) +#define G_028810_UCP_ENA_1(x) (((x) >> 1) & 0x1) +#define C_028810_UCP_ENA_1 0xFFFFFFFD +#define S_028810_UCP_ENA_2(x) (((x) & 0x1) << 2) +#define G_028810_UCP_ENA_2(x) (((x) >> 2) & 0x1) +#define C_028810_UCP_ENA_2 0xFFFFFFFB +#define S_028810_UCP_ENA_3(x) (((x) & 0x1) << 3) +#define G_028810_UCP_ENA_3(x) (((x) >> 3) & 0x1) +#define C_028810_UCP_ENA_3 0xFFFFFFF7 +#define S_028810_UCP_ENA_4(x) (((x) & 0x1) << 4) +#define G_028810_UCP_ENA_4(x) (((x) >> 4) & 0x1) +#define C_028810_UCP_ENA_4 0xFFFFFFEF +#define S_028810_UCP_ENA_5(x) (((x) & 0x1) << 5) +#define G_028810_UCP_ENA_5(x) (((x) >> 5) & 0x1) +#define C_028810_UCP_ENA_5 0xFFFFFFDF +#define S_028810_PS_UCP_Y_SCALE_NEG(x) (((x) & 0x1) << 13) +#define G_028810_PS_UCP_Y_SCALE_NEG(x) (((x) >> 13) & 0x1) +#define C_028810_PS_UCP_Y_SCALE_NEG 0xFFFFDFFF +#define S_028810_PS_UCP_MODE(x) (((x) & 0x3) << 14) +#define G_028810_PS_UCP_MODE(x) (((x) >> 14) & 0x3) +#define C_028810_PS_UCP_MODE 0xFFFF3FFF +#define S_028810_CLIP_DISABLE(x) (((x) & 0x1) << 16) +#define G_028810_CLIP_DISABLE(x) (((x) >> 16) & 0x1) +#define C_028810_CLIP_DISABLE 0xFFFEFFFF +#define S_028810_UCP_CULL_ONLY_ENA(x) (((x) & 0x1) << 17) +#define G_028810_UCP_CULL_ONLY_ENA(x) (((x) >> 17) & 0x1) +#define C_028810_UCP_CULL_ONLY_ENA 0xFFFDFFFF +#define S_028810_BOUNDARY_EDGE_FLAG_ENA(x) (((x) & 0x1) << 18) +#define G_028810_BOUNDARY_EDGE_FLAG_ENA(x) (((x) >> 18) & 0x1) +#define C_028810_BOUNDARY_EDGE_FLAG_ENA 0xFFFBFFFF +#define S_028810_DX_CLIP_SPACE_DEF(x) (((x) & 0x1) << 19) +#define G_028810_DX_CLIP_SPACE_DEF(x) (((x) >> 19) & 0x1) +#define C_028810_DX_CLIP_SPACE_DEF 0xFFF7FFFF +#define S_028810_DIS_CLIP_ERR_DETECT(x) (((x) & 0x1) << 20) +#define G_028810_DIS_CLIP_ERR_DETECT(x) (((x) >> 20) & 0x1) +#define C_028810_DIS_CLIP_ERR_DETECT 0xFFEFFFFF +#define S_028810_VTX_KILL_OR(x) (((x) & 0x1) << 21) +#define G_028810_VTX_KILL_OR(x) (((x) >> 21) & 0x1) +#define C_028810_VTX_KILL_OR 0xFFDFFFFF +#define S_028810_DX_LINEAR_ATTR_CLIP_ENA(x) (((x) & 0x1) << 24) +#define G_028810_DX_LINEAR_ATTR_CLIP_ENA(x) (((x) >> 24) & 0x1) +#define C_028810_DX_LINEAR_ATTR_CLIP_ENA 0xFEFFFFFF +#define S_028810_VTE_VPORT_PROVOKE_DISABLE(x) (((x) & 0x1) << 25) +#define G_028810_VTE_VPORT_PROVOKE_DISABLE(x) (((x) >> 25) & 0x1) +#define C_028810_VTE_VPORT_PROVOKE_DISABLE 0xFDFFFFFF +#define S_028810_ZCLIP_NEAR_DISABLE(x) (((x) & 0x1) << 26) +#define G_028810_ZCLIP_NEAR_DISABLE(x) (((x) >> 26) & 0x1) +#define C_028810_ZCLIP_NEAR_DISABLE 0xFBFFFFFF +#define S_028810_ZCLIP_FAR_DISABLE(x) (((x) & 0x1) << 27) +#define G_028810_ZCLIP_FAR_DISABLE(x) (((x) >> 27) & 0x1) +#define C_028810_ZCLIP_FAR_DISABLE 0xF7FFFFFF #define R_028010_DB_DEPTH_INFO 0x028010 #define S_028010_FORMAT(x) (((x) & 0x7) << 0) #define G_028010_FORMAT(x) (((x) >> 0) & 0x7) -- cgit v1.2.3 From a838cee6bc3a2e144c00f0a5f0a7791cd97037ab Mon Sep 17 00:00:00 2001 From: Maarten Maathuis Date: Fri, 6 Aug 2010 23:56:31 +0200 Subject: nouveau: fix potential NULL-ptr dereference in nouveau_stateobj.h - This can only be triggered when DEBUG_NOUVEAU_STATEOBJ is active. - Also remove a redundant pointer assignment. Reported-by: Roy Spliet Signed-off-by: Maarten Maathuis --- src/gallium/drivers/nouveau/nouveau_stateobj.h | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nouveau/nouveau_stateobj.h b/src/gallium/drivers/nouveau/nouveau_stateobj.h index f5c1c5ca2c..e920cf9f3b 100644 --- a/src/gallium/drivers/nouveau/nouveau_stateobj.h +++ b/src/gallium/drivers/nouveau/nouveau_stateobj.h @@ -151,9 +151,9 @@ so_method(struct nouveau_stateobj *so, struct nouveau_grobj *gr, if (so->start_alloc <= so->cur_start) { debug_printf("exceeding num_start size\n"); assert(0); - } else + } #endif /* DEBUG_NOUVEAU_STATEOBJ */ - start = so->start; + start = so->start; #ifdef DEBUG_NOUVEAU_STATEOBJ if (so->cur_start > 0 && start[so->cur_start - 1].size > so->cur) { @@ -162,7 +162,6 @@ so_method(struct nouveau_stateobj *so, struct nouveau_grobj *gr, } #endif /* DEBUG_NOUVEAU_STATEOBJ */ - so->start = start; start[so->cur_start].gr = gr; start[so->cur_start].mthd = mthd; start[so->cur_start].size = size; @@ -193,11 +192,10 @@ so_reloc(struct nouveau_stateobj *so, struct nouveau_bo *bo, if (so->reloc_alloc <= so->cur_reloc) { debug_printf("exceeding num_reloc size\n"); assert(0); - } else + } #endif /* DEBUG_NOUVEAU_STATEOBJ */ - r = so->reloc; + r = so->reloc; - so->reloc = r; r[so->cur_reloc].bo = NULL; nouveau_bo_ref(bo, &(r[so->cur_reloc].bo)); r[so->cur_reloc].gr = so->start[so->cur_start-1].gr; -- cgit v1.2.3 From ab0a3f30b8070a0204c023f8ea5375f1d3f710fd Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Sat, 7 Aug 2010 01:59:31 +0200 Subject: r300g: fix cbzb clears when hyperz is off --- src/gallium/drivers/r300/r300_flush.c | 3 +-- src/gallium/drivers/r300/r300_hyperz.c | 9 +++++++-- src/gallium/drivers/r300/r300_render.c | 3 +-- src/gallium/drivers/r300/r300_state.c | 3 +-- src/gallium/drivers/r300/r300_state_derived.c | 3 +-- 5 files changed, 11 insertions(+), 10 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_flush.c b/src/gallium/drivers/r300/r300_flush.c index 7fed9b5d07..fe182b6615 100644 --- a/src/gallium/drivers/r300/r300_flush.c +++ b/src/gallium/drivers/r300/r300_flush.c @@ -44,8 +44,7 @@ static void r300_flush(struct pipe_context* pipe, u_upload_flush(r300->upload_ib); if (r300->dirty_hw) { - if (r300->rws->get_value(r300->rws, R300_CAN_HYPERZ)) - r300_emit_hyperz_end(r300); + r300_emit_hyperz_end(r300); r300_emit_query_end(r300); r300->flush_counter++; diff --git a/src/gallium/drivers/r300/r300_hyperz.c b/src/gallium/drivers/r300/r300_hyperz.c index 10e440ce30..523d547ea9 100644 --- a/src/gallium/drivers/r300/r300_hyperz.c +++ b/src/gallium/drivers/r300/r300_hyperz.c @@ -21,12 +21,14 @@ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE * USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#include "util/u_format.h" -#include "util/u_mm.h" #include "r300_context.h" #include "r300_hyperz.h" #include "r300_reg.h" #include "r300_fs.h" +#include "r300_winsys.h" + +#include "util/u_format.h" +#include "util/u_mm.h" /* HiZ rules - taken from various docs @@ -138,6 +140,9 @@ static void r300_update_hyperz(struct r300_context* r300) return; } + if (!r300->rws->get_value(r300->rws, R300_CAN_HYPERZ)) + return; + /* Zbuffer compression. */ if (r300->z_compression) { z->zb_bw_cntl |= R300_RD_COMP_ENABLE; diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c index 910f5f7113..f2ff65b261 100644 --- a/src/gallium/drivers/r300/r300_render.c +++ b/src/gallium/drivers/r300/r300_render.c @@ -223,8 +223,7 @@ static void r300_prepare_for_rendering(struct r300_context *r300, /* Emitted in flush. */ end_dwords += 26; /* emit_query_end */ - if (r300->rws->get_value(r300->rws, R300_CAN_HYPERZ)) - end_dwords += r300->hyperz_state.size + 2; /* emit_hyperz_end + zcache flush */ + end_dwords += r300->hyperz_state.size + 2; /* emit_hyperz_end + zcache flush */ cs_dwords += end_dwords; diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c index 1e6b81d798..9db5e9e054 100644 --- a/src/gallium/drivers/r300/r300_state.c +++ b/src/gallium/drivers/r300/r300_state.c @@ -689,8 +689,7 @@ void r300_mark_fb_state_dirty(struct r300_context *r300, /* What is marked as dirty depends on the enum r300_fb_state_change. */ r300->gpu_flush.dirty = TRUE; r300->fb_state.dirty = TRUE; - if (r300->rws->get_value(r300->rws, R300_CAN_HYPERZ)) - r300->hyperz_state.dirty = TRUE; + r300->hyperz_state.dirty = TRUE; if (change == R300_CHANGED_FB_STATE) { r300->aa_state.dirty = TRUE; diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c index f3dad4c292..a85b46f5c7 100644 --- a/src/gallium/drivers/r300/r300_state_derived.c +++ b/src/gallium/drivers/r300/r300_state_derived.c @@ -694,6 +694,5 @@ void r300_update_derived_state(struct r300_context* r300) } } - if (r300->rws->get_value(r300->rws, R300_CAN_HYPERZ)) - r300_update_hyperz_state(r300); + r300_update_hyperz_state(r300); } -- cgit v1.2.3 From b55f6279370333574aa11c289f2ad9715cdca24d Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Sun, 8 Aug 2010 19:12:14 +1000 Subject: r300g: take hiz/zmask offsets into a/c when clearing. Need to add a test for multi-hiz/zmask db in a single context. Signed-off-by: Dave Airlie --- src/gallium/drivers/r300/r300_emit.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c index 17e180a79a..0c40e2db93 100644 --- a/src/gallium/drivers/r300/r300_emit.c +++ b/src/gallium/drivers/r300/r300_emit.c @@ -1008,6 +1008,8 @@ void r300_emit_hiz_clear(struct r300_context *r300, unsigned size, void *state) int i; tex = r300_texture(fb->zsbuf->texture); + + offset = tex->hiz_mem[fb->zsbuf->level]->ofs; stride = tex->desc.stride_in_pixels[fb->zsbuf->level]; /* convert from pixels to 4x4 blocks */ @@ -1043,6 +1045,8 @@ void r300_emit_zmask_clear(struct r300_context *r300, unsigned size, void *state tex = r300_texture(fb->zsbuf->texture); stride = tex->desc.stride_in_pixels[fb->zsbuf->level]; + offset = tex->zmask_mem[fb->zsbuf->level]->ofs; + if (r300->z_compression == RV350_Z_COMPRESS_88) mult = 8; else -- cgit v1.2.3 From 363b74f132a5a329fac25322f3c5c227c55b94a8 Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Sun, 8 Aug 2010 18:21:53 +0200 Subject: r300g: do not allocate a zmask block for 3D textures and cubemaps --- src/gallium/drivers/r300/r300_hyperz.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_hyperz.c b/src/gallium/drivers/r300/r300_hyperz.c index 523d547ea9..3b0adc3584 100644 --- a/src/gallium/drivers/r300/r300_hyperz.c +++ b/src/gallium/drivers/r300/r300_hyperz.c @@ -338,6 +338,12 @@ void r300_zmask_alloc_block(struct r300_context *r300, struct r300_surface *surf tex = r300_texture(surf->base.texture); + /* We currently don't handle decompression for 3D textures and cubemaps + * correctly. */ + if (tex->desc.b.b.target != PIPE_TEXTURE_1D && + tex->desc.b.b.target != PIPE_TEXTURE_2D) + return; + if (tex->zmask_mem[level]) return; -- cgit v1.2.3 From 4f5e51068bce4e32a9561b4b4d6f3feca33642bf Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Sun, 8 Aug 2010 18:43:42 +0200 Subject: r300g: flush zmasks of zbuffers we are going to use as samplers It sometimes works, sometimes not. I guess we have the zmask offsets wrong. --- src/gallium/drivers/r300/r300_blit.c | 19 ++++++++------ src/gallium/drivers/r300/r300_context.h | 7 ++++++ src/gallium/drivers/r300/r300_emit.c | 3 +++ src/gallium/drivers/r300/r300_state_derived.c | 36 +++++++++++++++++++++++++++ 4 files changed, 58 insertions(+), 7 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_blit.c b/src/gallium/drivers/r300/r300_blit.c index 6f8d9abfc8..18d00d61f4 100644 --- a/src/gallium/drivers/r300/r300_blit.c +++ b/src/gallium/drivers/r300/r300_blit.c @@ -259,27 +259,32 @@ static void r300_clear_depth_stencil(struct pipe_context *pipe, r300_blitter_end(r300); } -/* Clear a region of a depth stencil surface. */ -static void r300_flush_depth_stencil(struct pipe_context *pipe, - struct pipe_resource *dst, - struct pipe_subresource subdst) +/* Flush a depth stencil buffer. */ +void r300_flush_depth_stencil(struct pipe_context *pipe, + struct pipe_resource *dst, + struct pipe_subresource subdst, + unsigned zslice) { struct r300_context *r300 = r300_context(pipe); struct pipe_surface *dstsurf; struct r300_texture *tex = r300_texture(dst); - /* only flush the zmask if we have one attached to this texture */ if (!tex->zmask_mem[subdst.level]) return; + if (!tex->dirty_zmask[subdst.level]) + return; dstsurf = pipe->screen->get_tex_surface(pipe->screen, dst, - subdst.face, subdst.level, 0, + subdst.face, subdst.level, zslice, PIPE_BIND_DEPTH_STENCIL); r300->z_decomp_rd = TRUE; r300_blitter_begin(r300, R300_CLEAR_SURFACE); util_blitter_flush_depth_stencil(r300->blitter, dstsurf); r300_blitter_end(r300); r300->z_decomp_rd = FALSE; + + tex->dirty_zmask[subdst.level] = FALSE; + pipe->flush(pipe, 0, NULL); } /* Copy a block of pixels from one surface to another using HW. */ @@ -342,7 +347,7 @@ static void r300_resource_copy_region(struct pipe_context *pipe, is_depth = util_format_get_component_bits(src->format, UTIL_FORMAT_COLORSPACE_ZS, 0) != 0; if (is_depth) { - r300_flush_depth_stencil(pipe, src, subsrc); + r300_flush_depth_stencil(pipe, src, subsrc, srcz); } if (old_format != new_format) { dst->format = new_format; diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h index d86a5c8fc9..8b772f3887 100644 --- a/src/gallium/drivers/r300/r300_context.h +++ b/src/gallium/drivers/r300/r300_context.h @@ -397,6 +397,7 @@ struct r300_texture { /* hyper-z memory allocs */ struct mem_block *hiz_mem[R300_MAX_TEXTURE_LEVELS]; struct mem_block *zmask_mem[R300_MAX_TEXTURE_LEVELS]; + boolean dirty_zmask[R300_MAX_TEXTURE_LEVELS]; /* This is the level tiling flags were last time set for. * It's used to prevent redundant tiling-flags changes from happening.*/ @@ -628,6 +629,12 @@ void r300_init_render_functions(struct r300_context *r300); void r300_init_state_functions(struct r300_context* r300); void r300_init_resource_functions(struct r300_context* r300); +/* r300_blit.c */ +void r300_flush_depth_stencil(struct pipe_context *pipe, + struct pipe_resource *dst, + struct pipe_subresource subdst, + unsigned zslice); + /* r300_query.c */ void r300_resume_query(struct r300_context *r300, struct r300_query *query); diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c index 0c40e2db93..c35774c149 100644 --- a/src/gallium/drivers/r300/r300_emit.c +++ b/src/gallium/drivers/r300/r300_emit.c @@ -1069,6 +1069,9 @@ void r300_emit_zmask_clear(struct r300_context *r300, unsigned size, void *state offset <<= offset_shift; r300_emit_zmask_line_clear(r300, offset, stride, 0x0);//0xffffffff); } + + /* Mark the current zbuffer's zmask as dirty. */ + tex->dirty_zmask[fb->zsbuf->level] = TRUE; } void r300_emit_ztop_state(struct r300_context* r300, diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c index a85b46f5c7..693b1e29f2 100644 --- a/src/gallium/drivers/r300/r300_state_derived.c +++ b/src/gallium/drivers/r300/r300_state_derived.c @@ -677,8 +677,44 @@ static void r300_merge_textures_and_samplers(struct r300_context* r300) } } +/* We can't use compressed zbuffers as samplers. */ +static void r300_flush_depth_textures(struct r300_context *r300) +{ + struct r300_textures_state *state = + (struct r300_textures_state*)r300->textures_state.state; + unsigned i, level; + unsigned count = MIN2(state->sampler_view_count, + state->sampler_state_count); + + if (r300->z_decomp_rd) + return; + + for (i = 0; i < count; i++) + if (state->sampler_views[i] && state->sampler_states[i]) { + struct pipe_resource *tex = state->sampler_views[i]->base.texture; + + if (tex->target == PIPE_TEXTURE_3D || + tex->target == PIPE_TEXTURE_CUBE) + continue; + + /* Ignore non-depth textures. + * Also ignore reinterpreted depth textures, e.g. resource_copy. */ + if (!util_format_is_depth_or_stencil(tex->format)) + continue; + + for (level = 0; level <= tex->last_level; level++) + if (r300_texture(tex)->dirty_zmask[level]) { + /* We don't handle 3D textures and cubemaps yet. */ + r300_flush_depth_stencil(&r300->context, tex, + u_subresource(0, level), 0); + } + } +} + void r300_update_derived_state(struct r300_context* r300) { + r300_flush_depth_textures(r300); + if (r300->textures_state.dirty) { r300_merge_textures_and_samplers(r300); } -- cgit v1.2.3 From aef0fbd5b6e0b29342d09722c98d512b3661c31b Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Sun, 8 Aug 2010 23:09:46 +0200 Subject: r300g: remove a flush Ooops, it wasn't supposed to be there. --- src/gallium/drivers/r300/r300_blit.c | 1 - 1 file changed, 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_blit.c b/src/gallium/drivers/r300/r300_blit.c index 18d00d61f4..ff52286b5c 100644 --- a/src/gallium/drivers/r300/r300_blit.c +++ b/src/gallium/drivers/r300/r300_blit.c @@ -284,7 +284,6 @@ void r300_flush_depth_stencil(struct pipe_context *pipe, r300->z_decomp_rd = FALSE; tex->dirty_zmask[subdst.level] = FALSE; - pipe->flush(pipe, 0, NULL); } /* Copy a block of pixels from one surface to another using HW. */ -- cgit v1.2.3 From 3bb0719fe1514d2a8fd4674203882fdb08232172 Mon Sep 17 00:00:00 2001 From: Luca Barbieri Date: Mon, 9 Aug 2010 04:56:03 +0200 Subject: nouveau: fix maps with PIPE_TRANSFER_UNSYNCHRONIZED | PIPE_TRANSFER_DONTBLOCK In this case, we were incorrectly prioritizing PIPE_TRANSFER_DONTBLOCK over PIPE_TRANSFER_UNSYNCHRONIZED. This can lead to failure in the Mesa VBO draw paths that end up specifying both, but don't expect map to fail (in particular, the problem manifested as a leak of buffer objects in teapot with other changes). --- src/gallium/drivers/nouveau/nouveau_winsys.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nouveau/nouveau_winsys.h b/src/gallium/drivers/nouveau/nouveau_winsys.h index df79ca89ca..c6c93d40b8 100644 --- a/src/gallium/drivers/nouveau/nouveau_winsys.h +++ b/src/gallium/drivers/nouveau/nouveau_winsys.h @@ -24,11 +24,10 @@ nouveau_screen_transfer_flags(unsigned pipe) flags |= NOUVEAU_BO_WR; if (pipe & PIPE_TRANSFER_DISCARD) flags |= NOUVEAU_BO_INVAL; - if (pipe & PIPE_TRANSFER_DONTBLOCK) - flags |= NOUVEAU_BO_NOWAIT; - else if (pipe & PIPE_TRANSFER_UNSYNCHRONIZED) flags |= NOUVEAU_BO_NOSYNC; + else if (pipe & PIPE_TRANSFER_DONTBLOCK) + flags |= NOUVEAU_BO_NOWAIT; return flags; } -- cgit v1.2.3 From 771ad674142001edba1802f82c89c1071cf72ca2 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Mon, 9 Aug 2010 19:56:45 +1000 Subject: r300g: fix hiz/zmask offset emissions. ofs is in dwords, so need to shift it for registers. Signed-off-by: Dave Airlie --- src/gallium/drivers/r300/r300_emit.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c index c35774c149..7bd43b6eb5 100644 --- a/src/gallium/drivers/r300/r300_emit.c +++ b/src/gallium/drivers/r300/r300_emit.c @@ -393,7 +393,7 @@ void r300_emit_fb_state(struct r300_context* r300, unsigned size, void* state) /* HiZ RAM. */ if (r300->screen->caps.hiz_ram) { if (tex->hiz_mem[level]) { - OUT_CS_REG(R300_ZB_HIZ_OFFSET, tex->hiz_mem[level]->ofs); + OUT_CS_REG(R300_ZB_HIZ_OFFSET, tex->hiz_mem[level]->ofs << 2); OUT_CS_REG(R300_ZB_HIZ_PITCH, surf_pitch); } else { OUT_CS_REG(R300_ZB_HIZ_OFFSET, 0); @@ -402,7 +402,7 @@ void r300_emit_fb_state(struct r300_context* r300, unsigned size, void* state) } /* Z Mask RAM. (compressed zbuffer) */ if (tex->zmask_mem[level]) { - OUT_CS_REG(R300_ZB_ZMASK_OFFSET, tex->zmask_mem[level]->ofs); + OUT_CS_REG(R300_ZB_ZMASK_OFFSET, tex->zmask_mem[level]->ofs << 2); OUT_CS_REG(R300_ZB_ZMASK_PITCH, surf_pitch); } else { OUT_CS_REG(R300_ZB_ZMASK_OFFSET, 0); -- cgit v1.2.3 From 00963589b4d92460e3ae2c1557a5d816b5c67a6d Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Mon, 9 Aug 2010 14:53:58 +0200 Subject: r600g: fill out some missing caps and sort them The shader caps need additional corrections. (based on a patch from netkas at Phoronix) --- src/gallium/drivers/r600/r600_screen.c | 97 +++++++++++++++++++++++----------- 1 file changed, 65 insertions(+), 32 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/r600_screen.c b/src/gallium/drivers/r600/r600_screen.c index 4b87327a7c..6ec842d591 100644 --- a/src/gallium/drivers/r600/r600_screen.c +++ b/src/gallium/drivers/r600/r600_screen.c @@ -53,59 +53,92 @@ static const char* r600_get_name(struct pipe_screen* pscreen) static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param) { switch (param) { - case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS: - case PIPE_CAP_MAX_COMBINED_SAMPLERS: - return 16; + /* Supported features (boolean caps). */ case PIPE_CAP_NPOT_TEXTURES: - return 1; case PIPE_CAP_TWO_SIDED_STENCIL: - return 1; case PIPE_CAP_GLSL: - return 1; case PIPE_CAP_DUAL_SOURCE_BLEND: - return 1; case PIPE_CAP_ANISOTROPIC_FILTER: - return 1; case PIPE_CAP_POINT_SPRITE: - return 1; - case PIPE_CAP_MAX_RENDER_TARGETS: - /* FIXME some r6xx are buggy and can only do 4 */ - return 8; case PIPE_CAP_OCCLUSION_QUERY: - return 1; case PIPE_CAP_TEXTURE_SHADOW_MAP: - return 1; - case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: - case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: - case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: - /* FIXME not sure here */ - return 13; case PIPE_CAP_TEXTURE_MIRROR_CLAMP: - return 1; case PIPE_CAP_TEXTURE_MIRROR_REPEAT: - return 1; - case PIPE_CAP_MAX_VERTEX_TEXTURE_UNITS: - /* FIXME allow this once infrastructure is there */ - return 0; - case PIPE_CAP_TGSI_CONT_SUPPORTED: - return 0; case PIPE_CAP_BLEND_EQUATION_SEPARATE: - return 1; case PIPE_CAP_SM3: - return 1; + case PIPE_CAP_TEXTURE_SWIZZLE: case PIPE_CAP_INDEP_BLEND_ENABLE: - return 1; - case PIPE_CAP_INDEP_BLEND_FUNC: - /* FIXME allow this */ - return 0; case PIPE_CAP_DEPTHSTENCIL_CLEAR_SEPARATE: return 1; + + /* Unsupported features (boolean caps). */ + case PIPE_CAP_TIMER_QUERY: + case PIPE_CAP_TGSI_CONT_SUPPORTED: + case PIPE_CAP_STREAM_OUTPUT: + case PIPE_CAP_INDEP_BLEND_FUNC: /* FIXME allow this */ + case PIPE_CAP_GEOMETRY_SHADER4: + case PIPE_CAP_DEPTH_CLAMP: /* FIXME allow this */ + return 0; + + /* Texturing. */ + case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: + case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: + case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: + return 14; + case PIPE_CAP_MAX_VERTEX_TEXTURE_UNITS: + /* FIXME allow this once infrastructure is there */ + return 0; + case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS: + case PIPE_CAP_MAX_COMBINED_SAMPLERS: + return 16; + + /* Render targets. */ + case PIPE_CAP_MAX_RENDER_TARGETS: + /* FIXME some r6xx are buggy and can only do 4 */ + return 8; + + /* Fragment coordinate conventions. */ case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT: case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER: return 1; case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT: case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER: return 0; + + /* Shader limits. */ + case PIPE_CAP_MAX_VS_INSTRUCTIONS: + case PIPE_CAP_MAX_VS_ALU_INSTRUCTIONS: + case PIPE_CAP_MAX_VS_TEX_INSTRUCTIONS: + case PIPE_CAP_MAX_VS_TEX_INDIRECTIONS: + case PIPE_CAP_MAX_FS_INSTRUCTIONS: + case PIPE_CAP_MAX_FS_ALU_INSTRUCTIONS: + case PIPE_CAP_MAX_FS_TEX_INSTRUCTIONS: + case PIPE_CAP_MAX_FS_TEX_INDIRECTIONS: + return 8192; + case PIPE_CAP_MAX_VS_CONTROL_FLOW_DEPTH: + case PIPE_CAP_MAX_FS_CONTROL_FLOW_DEPTH: + return 8; /* FIXME */ + case PIPE_CAP_MAX_VS_INPUTS: + case PIPE_CAP_MAX_FS_INPUTS: + return 32; + case PIPE_CAP_MAX_VS_TEMPS: + case PIPE_CAP_MAX_FS_TEMPS: + return 128; + case PIPE_CAP_MAX_VS_ADDRS: + case PIPE_CAP_MAX_FS_ADDRS: + return 1; /* FIXME Isn't this equal to TEMPS? */ + case PIPE_CAP_MAX_VS_CONSTS: + case PIPE_CAP_MAX_FS_CONSTS: + return 256; /* FIXME I believe this should be much higher. */ + case PIPE_CAP_MAX_CONST_BUFFERS: + return 1; + case PIPE_CAP_MAX_CONST_BUFFER_SIZE: /* in bytes */ + return 4096; + case PIPE_CAP_MAX_PREDICATE_REGISTERS: + case PIPE_CAP_MAX_VS_PREDS: + case PIPE_CAP_MAX_FS_PREDS: + return 0; /* FIXME */ + default: R600_ERR("r600: unknown param %d\n", param); return 0; -- cgit v1.2.3 From 2cad5350f9691d4d2c18a637548735925fa0ee97 Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Mon, 9 Aug 2010 14:57:56 +0200 Subject: r600g: fix some warnings --- src/gallium/drivers/r600/r600_asm.c | 4 ++-- src/gallium/drivers/r600/r600_draw.c | 2 +- src/gallium/drivers/r600/r600_screen.h | 2 ++ 3 files changed, 5 insertions(+), 3 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c index f1dc3dc3a9..16c98504ad 100644 --- a/src/gallium/drivers/r600/r600_asm.c +++ b/src/gallium/drivers/r600/r600_asm.c @@ -287,7 +287,7 @@ static int r600_bc_tex_build(struct r600_bc *bc, struct r600_bc_tex *tex, unsign return 0; } -int r600_bc_alu_build(struct r600_bc *bc, struct r600_bc_alu *alu, unsigned id) +static int r600_bc_alu_build(struct r600_bc *bc, struct r600_bc_alu *alu, unsigned id) { unsigned i; @@ -331,7 +331,7 @@ int r600_bc_alu_build(struct r600_bc *bc, struct r600_bc_alu *alu, unsigned id) return 0; } -int r600_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf) +static int r600_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf) { unsigned id = cf->id; diff --git a/src/gallium/drivers/r600/r600_draw.c b/src/gallium/drivers/r600/r600_draw.c index 2420b76318..f058455162 100644 --- a/src/gallium/drivers/r600/r600_draw.c +++ b/src/gallium/drivers/r600/r600_draw.c @@ -127,7 +127,7 @@ static int r600_draw_common(struct r600_draw *draw) draw->draw->states[R600_DRAW__VGT_NUM_INDICES] = draw->count; draw->draw->states[R600_DRAW__VGT_DRAW_INITIATOR] = vgt_draw_initiator; if (draw->index_buffer) { - rbuffer = (struct r600_buffer*)draw->index_buffer; + rbuffer = (struct r600_resource*)draw->index_buffer; draw->draw->bo[0] = radeon_bo_incref(rscreen->rw, rbuffer->bo); draw->draw->placement[0] = RADEON_GEM_DOMAIN_GTT; draw->draw->placement[1] = RADEON_GEM_DOMAIN_GTT; diff --git a/src/gallium/drivers/r600/r600_screen.h b/src/gallium/drivers/r600/r600_screen.h index 9a452ecfe3..53b560c617 100644 --- a/src/gallium/drivers/r600/r600_screen.h +++ b/src/gallium/drivers/r600/r600_screen.h @@ -80,4 +80,6 @@ void r600_texture_transfer_unmap(struct pipe_context *ctx, int r600_conv_pipe_format(unsigned pformat, unsigned *format); int r600_conv_pipe_prim(unsigned pprim, unsigned *prim); +void r600_init_screen_texture_functions(struct pipe_screen *screen); + #endif -- cgit v1.2.3 From 29b7d26401a77ee4f772233b003d942bba59bb63 Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Mon, 9 Aug 2010 17:15:37 +0200 Subject: r300g: do not print shader compiler errors by default --- src/gallium/drivers/r300/r300_fs.c | 4 ++-- src/gallium/drivers/r300/r300_vs.c | 5 ++--- 2 files changed, 4 insertions(+), 5 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_fs.c b/src/gallium/drivers/r300/r300_fs.c index 87ff49a90c..5c905c1159 100644 --- a/src/gallium/drivers/r300/r300_fs.c +++ b/src/gallium/drivers/r300/r300_fs.c @@ -383,7 +383,7 @@ static void r300_translate_fragment_shader( find_output_registers(&compiler, shader); if (compiler.Base.Debug) { - debug_printf("r300: Initial fragment program\n"); + DBG(r300, DBG_FP, "r300: Initial fragment program\n"); tgsi_dump(tokens, 0); } @@ -418,7 +418,7 @@ static void r300_translate_fragment_shader( } if (compiler.Base.Error) { - fprintf(stderr, "r300 FP: Compiler Error:\n%sUsing a dummy shader" + DBG(r300, DBG_FP, "r300 FP: Compiler Error:\n%sUsing a dummy shader" " instead.\nIf there's an 'unknown opcode' message, please" " file a bug report and attach this log.\n", compiler.Base.ErrorMsg); diff --git a/src/gallium/drivers/r300/r300_vs.c b/src/gallium/drivers/r300/r300_vs.c index b25c786d6b..54c8de1241 100644 --- a/src/gallium/drivers/r300/r300_vs.c +++ b/src/gallium/drivers/r300/r300_vs.c @@ -207,7 +207,7 @@ void r300_translate_vertex_shader(struct r300_context *r300, compiler.Base.max_temp_regs = 32; if (compiler.Base.Debug) { - debug_printf("r300: Initial vertex program\n"); + DBG(r300, DBG_VP, "r300: Initial vertex program\n"); tgsi_dump(vs->state.tokens, 0); } @@ -227,8 +227,7 @@ void r300_translate_vertex_shader(struct r300_context *r300, /* Invoke the compiler */ r3xx_compile_vertex_program(&compiler); if (compiler.Base.Error) { - /* XXX We should fallback using Draw. */ - fprintf(stderr, "r300 VP: Compiler error:\n%sUsing a dummy shader" + DBG(r300, DBG_VP, "r300 VP: Compiler error:\n%sUsing a dummy shader" " instead.\nIf there's an 'unknown opcode' message, please" " file a bug report and attach this log.\n", compiler.Base.ErrorMsg); -- cgit v1.2.3 From 95fb0bf58dde0b81ce601d3f0477fd1b2a5a28d4 Mon Sep 17 00:00:00 2001 From: Jerome Glisse Date: Mon, 9 Aug 2010 11:32:45 -0400 Subject: r600g: fix r600 context structure, avoid segfault when no scissor Signed-off-by: Jerome Glisse --- src/gallium/drivers/r600/r600_context.h | 2 +- src/gallium/drivers/r600/r600_state.c | 17 +++++++++++++++-- 2 files changed, 16 insertions(+), 3 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/r600_context.h b/src/gallium/drivers/r600/r600_context.h index 431f8951b2..c606dbbda3 100644 --- a/src/gallium/drivers/r600/r600_context.h +++ b/src/gallium/drivers/r600/r600_context.h @@ -94,7 +94,7 @@ struct r600_context_hw_states { struct radeon_state *dsa; struct radeon_state *blend; struct radeon_state *viewport; - struct radeon_state *cb[7]; + struct radeon_state *cb[8]; struct radeon_state *config; struct radeon_state *cb_cntl; struct radeon_state *db; diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index ff621084d4..cad5185e32 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -836,12 +836,25 @@ static struct radeon_state *r600_rasterizer(struct r600_context *rctx) static struct radeon_state *r600_scissor(struct r600_context *rctx) { const struct pipe_scissor_state *state = &rctx->scissor->state.scissor; + const struct pipe_framebuffer_state *fb = &rctx->framebuffer->state.framebuffer; struct r600_screen *rscreen = rctx->screen; struct radeon_state *rstate; + unsigned minx, maxx, miny, maxy; u32 tl, br; - tl = S_028240_TL_X(state->minx) | S_028240_TL_Y(state->miny) | S_028240_WINDOW_OFFSET_DISABLE(1); - br = S_028244_BR_X(state->maxx) | S_028244_BR_Y(state->maxy); + if (state == NULL) { + minx = 0; + miny = 0; + maxx = fb->cbufs[0]->width; + maxy = fb->cbufs[0]->height; + } else { + minx = state->minx; + miny = state->miny; + maxx = state->maxx; + maxy = state->maxy; + } + tl = S_028240_TL_X(minx) | S_028240_TL_Y(miny) | S_028240_WINDOW_OFFSET_DISABLE(1); + br = S_028244_BR_X(maxx) | S_028244_BR_Y(maxy); rstate = radeon_state(rscreen->rw, R600_SCISSOR_TYPE, R600_SCISSOR); if (rstate == NULL) return NULL; -- cgit v1.2.3 From 65b9747a54490dd56cd5cee4c2c1b9f51d35f133 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Mon, 9 Aug 2010 16:41:44 +0100 Subject: util: Move _mm_shuffle_epi8() to u_sse.h. It's bound to be useful elsewhere. --- src/gallium/auxiliary/util/u_sse.h | 29 +++++++++++++++++++++++++++++ src/gallium/drivers/llvmpipe/lp_tile_soa.py | 29 +---------------------------- 2 files changed, 30 insertions(+), 28 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/auxiliary/util/u_sse.h b/src/gallium/auxiliary/util/u_sse.h index 6145e34aa3..87959ab0aa 100644 --- a/src/gallium/auxiliary/util/u_sse.h +++ b/src/gallium/auxiliary/util/u_sse.h @@ -71,6 +71,35 @@ _mm_castps_si128(__m128 a) #endif /* defined(_MSC_VER) && _MSC_VER < 1500 */ + +#if defined(PIPE_ARCH_SSSE3) + +#include + +#else /* !PIPE_ARCH_SSSE3 */ + +#include + +/** + * Describe _mm_shuffle_epi8() with gcc extended inline assembly, for cases + * where -mssse3 is not supported/enabled. + * + * MSVC will never get in here as its intrinsics support do not rely on + * compiler command line options. + */ +static __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_shuffle_epi8(__m128i a, __m128i mask) +{ + __m128i result; + __asm__("pshufb %1, %0" + : "=x" (result) + : "xm" (mask), "0" (a)); + return result; +} + +#endif /* !PIPE_ARCH_SSSE3 */ + + #endif /* PIPE_ARCH_X86 || PIPE_ARCH_X86_64 */ #endif /* U_SSE_H_ */ diff --git a/src/gallium/drivers/llvmpipe/lp_tile_soa.py b/src/gallium/drivers/llvmpipe/lp_tile_soa.py index c71ec8066c..2ba39052ab 100644 --- a/src/gallium/drivers/llvmpipe/lp_tile_soa.py +++ b/src/gallium/drivers/llvmpipe/lp_tile_soa.py @@ -293,34 +293,7 @@ def generate_ssse3(): print ''' #if defined(PIPE_ARCH_SSE) - -#if defined(PIPE_ARCH_SSSE3) - -#include - -#else - -#include - -/** - * Describe _mm_shuffle_epi8() with gcc extended inline assembly, for cases - * where -mssse3 is not supported/enabled. - * - * MSVC will never get in here as its intrinsics support do not rely on - * compiler command line options. - */ -static __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm_shuffle_epi8(__m128i a, __m128i mask) -{ - __m128i result; - __asm__("pshufb %1, %0" - : "=x" (result) - : "xm" (mask), "0" (a)); - return result; -} - -#endif - +#include "util/u_sse.h" static void lp_tile_b8g8r8a8_unorm_swizzle_4ub_ssse3(uint8_t *dst, -- cgit v1.2.3 From e9f3994b16beabb5892abdc4b359a093cbde7f79 Mon Sep 17 00:00:00 2001 From: nobled Date: Fri, 6 Aug 2010 17:32:29 +0000 Subject: llvmpipe: Always use floating-point operators for floating-point types MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit See: http://bugs.freedesktop.org/29404 http://bugs.freedesktop.org/29407 Signed-off-by: José Fonseca --- src/gallium/drivers/llvmpipe/lp_bld_interp.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/llvmpipe/lp_bld_interp.c b/src/gallium/drivers/llvmpipe/lp_bld_interp.c index 78744da500..2cf6f38c4b 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_interp.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_interp.c @@ -141,7 +141,7 @@ coeffs_init(struct lp_build_interp_soa_context *bld, else { dadx = LLVMBuildLoad(builder, LLVMBuildGEP(builder, dadx_ptr, &index, 1, ""), ""); dady = LLVMBuildLoad(builder, LLVMBuildGEP(builder, dady_ptr, &index, 1, ""), ""); - dadxy = LLVMBuildAdd(builder, dadx, dady, ""); + dadxy = LLVMBuildFAdd(builder, dadx, dady, ""); attrib_name(dadx, attrib, chan, ".dadx"); attrib_name(dady, attrib, chan, ".dady"); attrib_name(dadxy, attrib, chan, ".dadxy"); @@ -177,7 +177,7 @@ coeffs_init(struct lp_build_interp_soa_context *bld, * dadq2 = 2 * dq */ - dadq2 = LLVMBuildAdd(builder, dadq, dadq, ""); + dadq2 = LLVMBuildFAdd(builder, dadq, dadq, ""); /* * a = a0 + x * dadx + y * dady @@ -193,12 +193,11 @@ coeffs_init(struct lp_build_interp_soa_context *bld, a = a0; if (interp != LP_INTERP_CONSTANT && interp != LP_INTERP_FACING) { - a = LLVMBuildAdd(builder, a, - LLVMBuildMul(builder, bld->x, dadx, ""), - ""); - a = LLVMBuildAdd(builder, a, - LLVMBuildMul(builder, bld->y, dady, ""), - ""); + LLVMValueRef tmp; + tmp = LLVMBuildFMul(builder, bld->x, dadx, ""); + a = LLVMBuildFAdd(builder, a, tmp, ""); + tmp = LLVMBuildFMul(builder, bld->y, dady, ""); + a = LLVMBuildFAdd(builder, a, tmp, ""); } } @@ -212,7 +211,7 @@ coeffs_init(struct lp_build_interp_soa_context *bld, * Compute the attrib values on the upper-left corner of each quad. */ - a = LLVMBuildAdd(builder, a, dadq2, ""); + a = LLVMBuildFAdd(builder, a, dadq2, ""); /* * a *= 1 / w -- cgit v1.2.3 From 72f8edfc0bb8613ac7c0decfd4199e83c8d8a737 Mon Sep 17 00:00:00 2001 From: Jerome Glisse Date: Tue, 10 Aug 2010 11:52:00 -0400 Subject: r600g: avoid reemiting literal, avoid scheduling empty cs Signed-off-by: Jerome Glisse --- src/gallium/drivers/r600/r600_asm.c | 3 ++- src/gallium/drivers/r600/r600_asm.h | 1 + src/gallium/drivers/r600/r600_context.c | 5 ++++- src/gallium/drivers/r600/radeon.h | 31 +++++++++++++++++++++++++++++++ src/gallium/winsys/r600/drm/radeon_ctx.c | 2 ++ src/gallium/winsys/r600/drm/radeon_priv.h | 30 ------------------------------ 6 files changed, 40 insertions(+), 32 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c index 16c98504ad..ae818bf19b 100644 --- a/src/gallium/drivers/r600/r600_asm.c +++ b/src/gallium/drivers/r600/r600_asm.c @@ -179,12 +179,13 @@ int r600_bc_add_literal(struct r600_bc *bc, const u32 *value) return -EINVAL; } alu = LIST_ENTRY(struct r600_bc_alu, bc->cf_last->alu.prev, list); - if (!alu->last || !alu->nliteral) { + if (!alu->last || !alu->nliteral || alu->literal_added) { return 0; } memcpy(alu->value, value, 4 * 4); bc->cf_last->ndw += alu->nliteral; bc->ndw += alu->nliteral; + alu->literal_added = 1; return 0; } diff --git a/src/gallium/drivers/r600/r600_asm.h b/src/gallium/drivers/r600/r600_asm.h index 3fd94dbda0..10d98afaf0 100644 --- a/src/gallium/drivers/r600/r600_asm.h +++ b/src/gallium/drivers/r600/r600_asm.h @@ -48,6 +48,7 @@ struct r600_bc_alu { unsigned last; unsigned is_op3; unsigned nliteral; + unsigned literal_added; u32 value[4]; }; diff --git a/src/gallium/drivers/r600/r600_context.c b/src/gallium/drivers/r600/r600_context.c index 052eb1cd6d..edde80c660 100644 --- a/src/gallium/drivers/r600/r600_context.c +++ b/src/gallium/drivers/r600/r600_context.c @@ -54,15 +54,18 @@ void r600_flush(struct pipe_context *ctx, unsigned flags, /* FIXME dumping should be removed once shader support instructions * without throwing bad code */ + if (!rctx->ctx->cpm4) + goto out; sprintf(dname, "gallium-%08d.bof", dc); if (dc < 1) radeon_ctx_dump_bof(rctx->ctx, dname); #if 1 radeon_ctx_submit(rctx->ctx); #endif + dc++; +out: rctx->ctx = radeon_ctx_decref(rctx->ctx); rctx->ctx = radeon_ctx(rscreen->rw); - dc++; } static void r600_init_config(struct r600_context *rctx) diff --git a/src/gallium/drivers/r600/radeon.h b/src/gallium/drivers/r600/radeon.h index 00cff41b4f..8f00a4895a 100644 --- a/src/gallium/drivers/r600/radeon.h +++ b/src/gallium/drivers/r600/radeon.h @@ -156,6 +156,37 @@ int radeon_ctx_pm4(struct radeon_ctx *ctx); int radeon_ctx_submit(struct radeon_ctx *ctx); void radeon_ctx_dump_bof(struct radeon_ctx *ctx, const char *file); +/* + * radeon context functions + */ +#pragma pack(1) +struct radeon_cs_reloc { + uint32_t handle; + uint32_t read_domain; + uint32_t write_domain; + uint32_t flags; +}; +#pragma pack() + +struct radeon_ctx { + int refcount; + struct radeon *radeon; + u32 *pm4; + u32 cpm4; + u32 draw_cpm4; + unsigned id; + unsigned next_id; + unsigned nreloc; + struct radeon_cs_reloc *reloc; + unsigned nbo; + struct radeon_bo **bo; + unsigned ndraw; + struct radeon_draw *cdraw; + struct radeon_draw **draw; + unsigned nstate; + struct radeon_state **state; +}; + /* * R600/R700 */ diff --git a/src/gallium/winsys/r600/drm/radeon_ctx.c b/src/gallium/winsys/r600/drm/radeon_ctx.c index 6b0eba0b28..ff70ce6de7 100644 --- a/src/gallium/winsys/r600/drm/radeon_ctx.c +++ b/src/gallium/winsys/r600/drm/radeon_ctx.c @@ -151,6 +151,8 @@ int radeon_ctx_submit(struct radeon_ctx *ctx) uint64_t chunk_array[2]; int r = 0; + if (!ctx->cpm4) + return 0; #if 0 for (r = 0; r < ctx->cpm4; r++) { fprintf(stderr, "0x%08X\n", ctx->pm4[r]); diff --git a/src/gallium/winsys/r600/drm/radeon_priv.h b/src/gallium/winsys/r600/drm/radeon_priv.h index b91421f438..96c0d060f7 100644 --- a/src/gallium/winsys/r600/drm/radeon_priv.h +++ b/src/gallium/winsys/r600/drm/radeon_priv.h @@ -68,36 +68,6 @@ extern int radeon_is_family_compatible(unsigned family1, unsigned family2); extern int radeon_reg_id(struct radeon *radeon, unsigned offset, unsigned *typeid, unsigned *stateid, unsigned *id); extern unsigned radeon_type_from_id(struct radeon *radeon, unsigned id); -/* - * radeon context functions - */ -#pragma pack(1) -struct radeon_cs_reloc { - uint32_t handle; - uint32_t read_domain; - uint32_t write_domain; - uint32_t flags; -}; -#pragma pack() - -struct radeon_ctx { - int refcount; - struct radeon *radeon; - u32 *pm4; - u32 cpm4; - u32 draw_cpm4; - unsigned id; - unsigned next_id; - unsigned nreloc; - struct radeon_cs_reloc *reloc; - unsigned nbo; - struct radeon_bo **bo; - unsigned ndraw; - struct radeon_draw *cdraw; - struct radeon_draw **draw; - unsigned nstate; - struct radeon_state **state; -}; int radeon_ctx_set_bo_new(struct radeon_ctx *ctx, struct radeon_bo *bo); struct radeon_bo *radeon_ctx_get_bo(struct radeon_ctx *ctx, unsigned reloc); -- cgit v1.2.3 From c298bab60ea63882f34825a35cbc60f662783e64 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Thu, 5 Aug 2010 10:19:00 -0700 Subject: r300/compiler: Implement hardware assisted loops for vertex shaders. Single loops work, but nested loops do not. --- src/gallium/drivers/r300/r300_emit.c | 16 +++ src/gallium/drivers/r300/r300_reg.h | 21 ++++ src/gallium/drivers/r300/r300_state.c | 4 +- src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c | 2 +- src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c | 115 ++++++++++++++++++--- .../drivers/dri/r300/compiler/r3xx_vertprog_dump.c | 21 +++- src/mesa/drivers/dri/r300/compiler/radeon_code.h | 19 +++- .../drivers/dri/r300/compiler/radeon_compiler.h | 1 + .../dri/r300/compiler/radeon_emulate_loops.c | 8 +- .../dri/r300/compiler/radeon_emulate_loops.h | 3 +- src/mesa/drivers/dri/r300/r300_reg.h | 21 ++++ 11 files changed, 210 insertions(+), 21 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c index 7bd43b6eb5..98958d1a2e 100644 --- a/src/gallium/drivers/r300/r300_emit.c +++ b/src/gallium/drivers/r300/r300_emit.c @@ -936,6 +936,22 @@ void r300_emit_vs_state(struct r300_context* r300, unsigned size, void* state) OUT_CS_TABLE(data, 4); } } + + /* Emit flow control instructions. */ + if (code->num_fc_ops) { + + OUT_CS_REG(R300_VAP_PVS_FLOW_CNTL_OPC, code->fc_ops); + if (r300screen->caps.is_r500) { + OUT_CS_REG_SEQ(R500_VAP_PVS_FLOW_CNTL_ADDRS_LW_0, code->num_fc_ops * 2); + OUT_CS_TABLE(code->fc_op_addrs.r500, code->num_fc_ops * 2); + } else { + OUT_CS_REG_SEQ(R300_VAP_PVS_FLOW_CNTL_ADDRS_0, code->num_fc_ops); + OUT_CS_TABLE(code->fc_op_addrs.r300, code->num_fc_ops); + } + OUT_CS_REG_SEQ(R300_VAP_PVS_FLOW_CNTL_LOOP_INDEX_0, code->num_fc_ops); + OUT_CS_TABLE(code->fc_loop_index, code->num_fc_ops); + } + END_CS; } diff --git a/src/gallium/drivers/r300/r300_reg.h b/src/gallium/drivers/r300/r300_reg.h index 99a9d65055..60d3b600cb 100644 --- a/src/gallium/drivers/r300/r300_reg.h +++ b/src/gallium/drivers/r300/r300_reg.h @@ -496,6 +496,12 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. #define R300_VAP_GB_HORZ_CLIP_ADJ 0x2228 #define R300_VAP_GB_HORZ_DISC_ADJ 0x222c +#define R300_VAP_PVS_FLOW_CNTL_ADDRS_0 0x2230 +#define R300_PVS_FC_ACT_ADRS(x) ((x) << 0) +#define R300_PVS_FC_LOOP_CNT_JMP_INST(x) ((x) << 8) +#define R300_PVS_FC_LAST_INST(x) ((x) << 16) +#define R300_PVS_FC_RTN_INST(x) ((x) << 24) + /* gap */ /* Sometimes, END_OF_PKT and 0x2284=0 are the only commands sent between @@ -514,6 +520,10 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R300_2288_R300 0x00750000 /* -- nh */ # define R300_2288_RV350 0x0000FFFF /* -- Vladimir */ +#define R300_VAP_PVS_FLOW_CNTL_LOOP_INDEX_0 0x2290 +#define R300_PVS_FC_LOOP_INIT_VAL(x) ((x) << 0) +#define R300_PVS_FC_LOOP_STEP_VAL(x) ((x) << 8) + /* gap */ /* Addresses are relative to the vertex program instruction area of the @@ -548,6 +558,9 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. #define R300_VAP_PVS_CODE_CNTL_1 0x22D8 # define R300_PVS_LAST_VTX_SRC_INST_SHIFT 0 #define R300_VAP_PVS_FLOW_CNTL_OPC 0x22DC +#define R300_VAP_PVS_FC_OPC_JUMP(x) (1 << (2 * (x))) +#define R300_VAP_PVS_FC_OPC_LOOP(x) (2 << (2 * (x))) +#define R300_VAP_PVS_FC_OPC_JSR(x) (3 << (2 * (x))) /* The entire range from 0x2300 to 0x2AC inclusive seems to be used for * immediate vertices @@ -564,6 +577,14 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. /* write 0 to indicate end of packet? */ #define R300_VAP_VTX_END_OF_PKT 0x24AC +#define R500_VAP_PVS_FLOW_CNTL_ADDRS_LW_0 0x2500 +#define R500_PVS_FC_ACT_ADRS(x) ((x) << 0) +#define R500_PVS_FC_LOOP_CNT_JMP_INST(x) ((x) << 16) + +#define R500_VAP_PVS_FLOW_CNTL_ADDRS_UW_0 0x2504 +#define R500_PVS_FC_LAST_INST(x) ((x) << 0) +#define R500_PVS_FC_RTN_INST(x) ((x) << 16) + /* gap */ /* These are values from r300_reg/r300_reg.h - they are known to be correct diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c index 9db5e9e054..e62a33daeb 100644 --- a/src/gallium/drivers/r300/r300_state.c +++ b/src/gallium/drivers/r300/r300_state.c @@ -1758,10 +1758,12 @@ static void r300_bind_vs_state(struct pipe_context* pipe, void* shader) r300->rs_block_state.dirty = TRUE; /* Will be updated before the emission. */ if (r300->screen->caps.has_tcl) { + unsigned fc_op_dwords = r300->screen->caps.is_r500 ? 3 : 2; r300->vs_state.dirty = TRUE; r300->vs_state.size = vs->code.length + 9 + - (vs->immediates_count ? vs->immediates_count * 4 + 3 : 0); + (vs->immediates_count ? vs->immediates_count * 4 + 3 : 0) + + (vs->code.num_fc_ops ? vs->code.num_fc_ops * fc_op_dwords + 4 : 0); if (vs->externals_count) { r300->vs_constants.dirty = TRUE; diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c index c6246a81a2..d2fa816894 100644 --- a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c +++ b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c @@ -113,7 +113,7 @@ void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c) debug_program_log(c, "after unroll loops"); } else{ - rc_transform_loops(&c->Base, &loop_state); + rc_transform_loops(&c->Base, &loop_state, -1); debug_program_log(c, "after transform loops"); rc_emulate_branches(&c->Base); diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c index e940fedec2..7c2ba2fc09 100644 --- a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c +++ b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c @@ -32,6 +32,11 @@ #include "radeon_emulate_branches.h" #include "radeon_emulate_loops.h" +struct loop { + int BgnLoop; + +}; + /* * Take an already-setup and valid source then swizzle it appropriately to * obtain a constant ZERO or ONE source. @@ -337,6 +342,10 @@ static void translate_vertex_program(struct r300_vertex_program_compiler * compi { struct rc_instruction *rci; + struct loop * loops; + int current_loop_depth = 0; + int loops_reserved = 0; + compiler->code->pos_end = 0; /* Not supported yet */ compiler->code->length = 0; @@ -385,6 +394,68 @@ static void translate_vertex_program(struct r300_vertex_program_compiler * compi case RC_OPCODE_SIN: ei_math1(compiler->code, ME_SIN, vpi, inst); break; case RC_OPCODE_SLT: ei_vector2(compiler->code, VE_SET_LESS_THAN, vpi, inst); break; case RC_OPCODE_SNE: ei_vector2(compiler->code, VE_SET_NOT_EQUAL, vpi, inst); break; + case RC_OPCODE_BGNLOOP: + { + struct loop * l; + + if ((!compiler->Base.is_r500 + && loops_reserved >= R300_VS_MAX_LOOP_DEPTH) + || loops_reserved >= R500_VS_MAX_FC_DEPTH) { + rc_error(&compiler->Base, + "Loops are nested too deep."); + return; + } + memory_pool_array_reserve(&compiler->Base.Pool, + struct loop, loops, current_loop_depth, + loops_reserved, 1); + l = &loops[current_loop_depth++]; + memset(l , 0, sizeof(struct loop)); + l->BgnLoop = (compiler->code->length / 4); + continue; + } + case RC_OPCODE_ENDLOOP: + { + struct loop * l = &loops[current_loop_depth - 1]; + unsigned int act_addr = l->BgnLoop - 1; + unsigned int last_addr = (compiler->code->length / 4) - 1; + unsigned int ret_addr = l->BgnLoop; + + if (loops_reserved >= R300_VS_MAX_FC_OPS) { + rc_error(&compiler->Base, + "Too many flow control instructions."); + return; + } + if (compiler->Base.is_r500) { + compiler->code->fc_op_addrs.r500 + [compiler->code->num_fc_ops].lw = + R500_PVS_FC_ACT_ADRS(act_addr) + | R500_PVS_FC_LOOP_CNT_JMP_INST(0xffff) + ; + compiler->code->fc_op_addrs.r500 + [compiler->code->num_fc_ops].uw = + R500_PVS_FC_LAST_INST(last_addr) + | R500_PVS_FC_RTN_INST(ret_addr) + ; + } else { + compiler->code->fc_op_addrs.r300 + [compiler->code->num_fc_ops] = + R300_PVS_FC_ACT_ADRS(act_addr) + | R300_PVS_FC_LOOP_CNT_JMP_INST(0xff) + | R300_PVS_FC_LAST_INST(last_addr) + | R300_PVS_FC_RTN_INST(ret_addr) + ; + } + compiler->code->fc_loop_index[compiler->code->num_fc_ops] = + R300_PVS_FC_LOOP_INIT_VAL(0x0) + | R300_PVS_FC_LOOP_STEP_VAL(0x1) + ; + compiler->code->fc_ops |= R300_VAP_PVS_FC_OPC_LOOP( + compiler->code->num_fc_ops); + compiler->code->num_fc_ops++; + current_loop_depth--; + continue; + } + default: rc_error(&compiler->Base, "Unknown opcode %s\n", rc_get_opcode_info(vpi->Opcode)->Name); return; @@ -406,6 +477,7 @@ struct temporary_allocation { static void allocate_temporary_registers(struct r300_vertex_program_compiler * compiler) { struct rc_instruction *inst; + struct rc_instruction *end_loop = NULL; unsigned int num_orig_temps = 0; char hwtemps[R300_VS_MAX_TEMPS]; struct temporary_allocation * ta; @@ -440,10 +512,35 @@ static void allocate_temporary_registers(struct r300_vertex_program_compiler * c /* Pass 2: Determine original temporary lifetimes */ for(inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions; inst = inst->Next) { const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); + /* Instructions inside of loops need to use the ENDLOOP + * instruction as their LastRead. */ + if (!end_loop && inst->U.I.Opcode == RC_OPCODE_BGNLOOP) { + int endloops = 1; + struct rc_instruction * ptr; + for(ptr = inst->Next; + ptr != &compiler->Base.Program.Instructions; + ptr = ptr->Next){ + if (ptr->U.I.Opcode == RC_OPCODE_BGNLOOP) { + endloops++; + } else if (ptr->U.I.Opcode == RC_OPCODE_ENDLOOP) { + endloops--; + if (endloops <= 0) { + end_loop = ptr; + break; + } + } + } + } + + if (inst == end_loop) { + end_loop = NULL; + continue; + } for (i = 0; i < opcode->NumSrcRegs; ++i) { if (inst->U.I.SrcReg[i].File == RC_FILE_TEMPORARY) - ta[inst->U.I.SrcReg[i].Index].LastRead = inst; + ta[inst->U.I.SrcReg[i].Index].LastRead = + end_loop ? end_loop : inst; } } @@ -640,17 +737,11 @@ void r3xx_compile_vertex_program(struct r300_vertex_program_compiler* compiler) debug_program_log(compiler, "before compilation"); - /* XXX Ideally this should be done only for r3xx, but since - * we don't have branching support for r5xx, we use the emulation - * on all chipsets. */ + if (compiler->Base.is_r500) + rc_transform_loops(&compiler->Base, &loop_state, R500_VS_MAX_ALU); + else + rc_transform_loops(&compiler->Base, &loop_state, R300_VS_MAX_ALU); - if (compiler->Base.is_r500){ - rc_transform_loops(&compiler->Base, &loop_state); - rc_emulate_loops(&loop_state, R500_VS_MAX_ALU); - } else { - rc_transform_loops(&compiler->Base, &loop_state); - rc_emulate_loops(&loop_state, R300_VS_MAX_ALU); - } debug_program_log(compiler, "after emulate loops"); rc_emulate_branches(&compiler->Base); @@ -717,6 +808,6 @@ void r3xx_compile_vertex_program(struct r300_vertex_program_compiler* compiler) if (compiler->Base.Debug) { fprintf(stderr, "Final vertex program code:\n"); - r300_vertex_program_dump(compiler->code); + r300_vertex_program_dump(compiler); } } diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog_dump.c b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog_dump.c index 5800f1a78e..66e352d05d 100644 --- a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog_dump.c +++ b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog_dump.c @@ -20,6 +20,7 @@ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE * USE OR OTHER DEALINGS IN THE SOFTWARE. */ +#include "radeon_compiler.h" #include "radeon_code.h" #include @@ -160,8 +161,9 @@ static void r300_vs_src_dump(uint32_t src) r300_vs_swiz_debug[(src >> 22) & 0x7]); } -void r300_vertex_program_dump(struct r300_vertex_program_code * vs) +void r300_vertex_program_dump(struct r300_vertex_program_compiler * c) { + struct r300_vertex_program_code * vs = c->code; unsigned instrcount = vs->length / 4; unsigned i; @@ -177,4 +179,21 @@ void r300_vertex_program_dump(struct r300_vertex_program_code * vs) r300_vs_src_dump(vs->body.d[offset+1+src]); } } + + fprintf(stderr, "Flow Control Ops: 0x%08x\n",vs->fc_ops); + for(i = 0; i < vs->num_fc_ops; i++) { + switch((vs->fc_ops >> (i * 2)) & 0x3 ) { + case 0: fprintf(stderr, "NOP"); break; + case 1: fprintf(stderr, "JUMP"); break; + case 2: fprintf(stderr, "LOOP"); break; + case 3: fprintf(stderr, "JSR"); break; + } + if (c->Base.is_r500) { + fprintf(stderr,": uw-> 0x%08x lw-> 0x%08x\n", + vs->fc_op_addrs.r500[i].uw, + vs->fc_op_addrs.r500[i].lw); + } else { + fprintf(stderr,": 0x%08x\n", vs->fc_op_addrs.r300[i]); + } + } } diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_code.h b/src/mesa/drivers/dri/r300/compiler/radeon_code.h index e14a3520dd..896246d203 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_code.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_code.h @@ -243,6 +243,12 @@ struct rX00_fragment_program_code { #define R500_VS_MAX_ALU 1024 #define R500_VS_MAX_ALU_DWORDS (R500_VS_MAX_ALU * 4) #define R300_VS_MAX_TEMPS 32 +/* This is the max for all chipsets (r300-r500) */ +#define R300_VS_MAX_FC_OPS 16 +/* The r500 maximum depth is not just for loops, but any combination of loops + * and subroutine jumps. */ +#define R500_VS_MAX_FC_DEPTH 8 +#define R300_VS_MAX_LOOP_DEPTH 1 #define VSF_MAX_INPUTS 32 #define VSF_MAX_OUTPUTS 32 @@ -263,9 +269,18 @@ struct r300_vertex_program_code { uint32_t InputsRead; uint32_t OutputsWritten; -}; -void r300_vertex_program_dump(struct r300_vertex_program_code * vs); + unsigned int num_fc_ops; + uint32_t fc_ops; + union { + uint32_t r300[R300_VS_MAX_FC_OPS]; + struct { + uint32_t lw; + uint32_t uw; + } r500[R300_VS_MAX_FC_OPS]; + } fc_op_addrs; + int32_t fc_loop_index[R300_VS_MAX_FC_OPS]; +}; #endif /* RADEON_CODE_H */ diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h index f15905d79d..bbd57cca63 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h @@ -113,5 +113,6 @@ struct r300_vertex_program_compiler { }; void r3xx_compile_vertex_program(struct r300_vertex_program_compiler* c); +void r300_vertex_program_dump(struct r300_vertex_program_compiler * c); #endif /* RADEON_COMPILER_H */ diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.c b/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.c index 2a3306f906..32d4b45dd6 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.c @@ -423,7 +423,8 @@ static int build_loop_info(struct radeon_compiler * c, struct loop_info * loop, * @param inst A pointer to a BGNLOOP instruction. * @return 1 for success, 0 for failure */ -int transform_loop(struct emulate_loop_state * s, struct rc_instruction * inst) +static int transform_loop(struct emulate_loop_state * s, + struct rc_instruction * inst) { struct loop_info * loop; @@ -435,7 +436,7 @@ int transform_loop(struct emulate_loop_state * s, struct rc_instruction * inst) if (!build_loop_info(s->C, loop, inst)) return 0; - if(try_unroll_loop(s->C, loop, -1)){ + if(try_unroll_loop(s->C, loop, s->prog_inst_limit)){ return 1; } @@ -472,12 +473,13 @@ int transform_loop(struct emulate_loop_state * s, struct rc_instruction * inst) } void rc_transform_loops(struct radeon_compiler *c, - struct emulate_loop_state * s) + struct emulate_loop_state * s, int prog_inst_limit) { struct rc_instruction * ptr; memset(s, 0, sizeof(struct emulate_loop_state)); s->C = c; + s->prog_inst_limit = prog_inst_limit; for(ptr = s->C->Program.Instructions.Next; ptr != &s->C->Program.Instructions; ptr = ptr->Next) { if(ptr->Type == RC_INSTRUCTION_NORMAL && diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.h b/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.h index 86d91ef14b..bba1f68e30 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.h @@ -21,10 +21,11 @@ struct emulate_loop_state { struct loop_info * Loops; unsigned int LoopCount; unsigned int LoopReserved; + int prog_inst_limit; }; void rc_transform_loops(struct radeon_compiler *c, - struct emulate_loop_state * s); + struct emulate_loop_state * s, int prog_inst_limit); void rc_unroll_loops(struct radeon_compiler * c, int prog_inst_limit); diff --git a/src/mesa/drivers/dri/r300/r300_reg.h b/src/mesa/drivers/dri/r300/r300_reg.h index f25264b6f2..f7705b0f6f 100644 --- a/src/mesa/drivers/dri/r300/r300_reg.h +++ b/src/mesa/drivers/dri/r300/r300_reg.h @@ -441,6 +441,12 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. #define R300_VAP_GB_HORZ_CLIP_ADJ 0x2228 #define R300_VAP_GB_HORZ_DISC_ADJ 0x222c +#define R300_VAP_PVS_FLOW_CNTL_ADDRS_0 0x2230 +#define R300_PVS_FC_ACT_ADRS(x) ((x) << 0) +#define R300_PVS_FC_LOOP_CNT_JMP_INST(x) ((x) << 8) +#define R300_PVS_FC_LAST_INST(x) ((x) << 16) +#define R300_PVS_FC_RTN_INST(x) ((x) << 24) + /* gap */ /* Sometimes, END_OF_PKT and 0x2284=0 are the only commands sent between @@ -459,6 +465,10 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R300_2288_R300 0x00750000 /* -- nh */ # define R300_2288_RV350 0x0000FFFF /* -- Vladimir */ +#define R300_VAP_PVS_FLOW_CNTL_LOOP_INDEX_0 0x2290 +#define R300_PVS_FC_LOOP_INIT_VAL(x) ((x) << 0) +#define R300_PVS_FC_LOOP_STEP_VAL(x) ((x) << 8) + /* gap */ /* Addresses are relative to the vertex program instruction area of the @@ -489,6 +499,9 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. #define R300_VAP_PVS_CODE_CNTL_1 0x22D8 # define R300_PVS_LAST_VTX_SRC_INST_SHIFT 0 #define R300_VAP_PVS_FLOW_CNTL_OPC 0x22DC +#define R300_VAP_PVS_FC_OPC_JUMP(x) (1 << (2 * (x))) +#define R300_VAP_PVS_FC_OPC_LOOP(x) (2 << (2 * (x))) +#define R300_VAP_PVS_FC_OPC_JSR(x) (3 << (2 * (x))) /* The entire range from 0x2300 to 0x2AC inclusive seems to be used for * immediate vertices @@ -505,6 +518,14 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. /* write 0 to indicate end of packet? */ #define R300_VAP_VTX_END_OF_PKT 0x24AC +#define R500_VAP_PVS_FLOW_CNTL_ADDRS_LW_0 0x2500 +#define R500_PVS_FC_ACT_ADRS(x) ((x) << 0) +#define R500_PVS_FC_LOOP_CNT_JMP_INST(x) ((x) << 16) + +#define R500_VAP_PVS_FLOW_CNTL_ADDRS_UW_0 0x2504 +#define R500_PVS_FC_LAST_INST(x) ((x) << 0) +#define R500_PVS_FC_RTN_INST(x) ((x) << 16) + /* gap */ /* These are values from r300_reg/r300_reg.h - they are known to be correct -- cgit v1.2.3 From 34cdad62dd26ce2e2d59df1c4b82bb301894b762 Mon Sep 17 00:00:00 2001 From: Vinson Lee Date: Tue, 10 Aug 2010 17:39:13 -0700 Subject: r300g: Remove unnecessary header. --- src/gallium/drivers/r300/r300_state_derived.c | 1 - 1 file changed, 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c index 693b1e29f2..1f36e7758f 100644 --- a/src/gallium/drivers/r300/r300_state_derived.c +++ b/src/gallium/drivers/r300/r300_state_derived.c @@ -35,7 +35,6 @@ #include "r300_state_inlines.h" #include "r300_texture.h" #include "r300_vs.h" -#include "r300_winsys.h" /* r300_state_derived: Various bits of state which are dependent upon * currently bound CSO data. */ -- cgit v1.2.3 From 683ef52e19576f6e1263bc7d25fc9475c519eade Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Tue, 10 Aug 2010 14:21:05 +0200 Subject: r300g: implement gl_FrontFacing --- src/gallium/drivers/r300/r300_fs.c | 15 +++++++- src/gallium/drivers/r300/r300_shader_semantics.h | 2 + src/gallium/drivers/r300/r300_state_derived.c | 43 +++++++++++++++++++--- .../drivers/dri/r300/compiler/radeon_compiler.c | 43 ++++++++++++++++++++++ .../drivers/dri/r300/compiler/radeon_compiler.h | 1 + 5 files changed, 97 insertions(+), 7 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_fs.c b/src/gallium/drivers/r300/r300_fs.c index 5c905c1159..2a0c30620a 100644 --- a/src/gallium/drivers/r300/r300_fs.c +++ b/src/gallium/drivers/r300/r300_fs.c @@ -72,6 +72,11 @@ void r300_shader_read_fs_inputs(struct tgsi_shader_info* info, fs_inputs->wpos = i; break; + case TGSI_SEMANTIC_FACE: + assert(index == 0); + fs_inputs->face = i; + break; + default: fprintf(stderr, "r300: FP: Unknown input semantic: %i\n", info->input_semantic_name[i]); @@ -120,6 +125,9 @@ static void allocate_hardware_inputs( allocate(mydata, inputs->color[i], reg++); } } + if (inputs->face != ATTR_UNUSED) { + allocate(mydata, inputs->face, reg++); + } for (i = 0; i < ATTR_GENERIC_COUNT; i++) { if (inputs->generic[i] != ATTR_UNUSED) { allocate(mydata, inputs->generic[i], reg++); @@ -360,13 +368,14 @@ static void r300_translate_fragment_shader( { struct r300_fragment_program_compiler compiler; struct tgsi_to_rc ttr; - int wpos; + int wpos, face; unsigned i; tgsi_scan_shader(tokens, &shader->info); r300_shader_read_fs_inputs(&shader->info, &shader->inputs); wpos = shader->inputs.wpos; + face = shader->inputs.face; /* Setup the compiler. */ memset(&compiler, 0, sizeof(compiler)); @@ -406,6 +415,10 @@ static void r300_translate_fragment_shader( rc_transform_fragment_wpos(&compiler.Base, wpos, wpos, TRUE); } + if (face != ATTR_UNUSED) { + rc_transform_fragment_face(&compiler.Base, face); + } + /* Invoke the compiler */ r3xx_compile_fragment_program(&compiler); diff --git a/src/gallium/drivers/r300/r300_shader_semantics.h b/src/gallium/drivers/r300/r300_shader_semantics.h index cb7a37033f..4be23e64ce 100644 --- a/src/gallium/drivers/r300/r300_shader_semantics.h +++ b/src/gallium/drivers/r300/r300_shader_semantics.h @@ -38,6 +38,7 @@ struct r300_shader_semantics { int psize; int color[ATTR_COLOR_COUNT]; int bcolor[ATTR_COLOR_COUNT]; + int face; int generic[ATTR_GENERIC_COUNT]; int fog; int wpos; @@ -50,6 +51,7 @@ static INLINE void r300_shader_semantics_reset( info->pos = ATTR_UNUSED; info->psize = ATTR_UNUSED; + info->face = ATTR_UNUSED; info->fog = ATTR_UNUSED; info->wpos = ATTR_UNUSED; diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c index 1f36e7758f..39000477cb 100644 --- a/src/gallium/drivers/r300/r300_state_derived.c +++ b/src/gallium/drivers/r300/r300_state_derived.c @@ -46,6 +46,11 @@ enum r300_rs_swizzle { SWIZ_0001, }; +enum r300_rs_col_write_type { + WRITE_COLOR = 0, + WRITE_FACE +}; + static void r300_draw_emit_attrib(struct r300_context* r300, enum attrib_emit emit, enum interp_mode interp, @@ -203,8 +208,10 @@ static void r300_rs_col(struct r300_rs_block* rs, int id, int ptr, rs->inst[id] |= R300_RS_INST_COL_ID(id); } -static void r300_rs_col_write(struct r300_rs_block* rs, int id, int fp_offset) +static void r300_rs_col_write(struct r300_rs_block* rs, int id, int fp_offset, + enum r300_rs_col_write_type type) { + assert(type != WRITE_COLOR); rs->inst[id] |= R300_RS_INST_COL_CN_WRITE | R300_RS_INST_COL_ADDR(fp_offset); } @@ -252,10 +259,16 @@ static void r500_rs_col(struct r300_rs_block* rs, int id, int ptr, rs->inst[id] |= R500_RS_INST_COL_ID(id); } -static void r500_rs_col_write(struct r300_rs_block* rs, int id, int fp_offset) +static void r500_rs_col_write(struct r300_rs_block* rs, int id, int fp_offset, + enum r300_rs_col_write_type type) { - rs->inst[id] |= R500_RS_INST_COL_CN_WRITE | - R500_RS_INST_COL_ADDR(fp_offset); + if (type == WRITE_FACE) + rs->inst[id] |= R500_RS_INST_COL_CN_WRITE_BACKFACE | + R500_RS_INST_COL_ADDR(fp_offset); + else + rs->inst[id] |= R500_RS_INST_COL_CN_WRITE | + R500_RS_INST_COL_ADDR(fp_offset); + } static void r500_rs_tex(struct r300_rs_block* rs, int id, int ptr, @@ -305,7 +318,7 @@ static void r300_update_rs_block(struct r300_context *r300) struct r300_rs_block rs = {0}; int i, col_count = 0, tex_count = 0, fp_offset = 0, count, loc = 0, tex_ptr = 0; void (*rX00_rs_col)(struct r300_rs_block*, int, int, enum r300_rs_swizzle); - void (*rX00_rs_col_write)(struct r300_rs_block*, int, int); + void (*rX00_rs_col_write)(struct r300_rs_block*, int, int, enum r300_rs_col_write_type); void (*rX00_rs_tex)(struct r300_rs_block*, int, int, enum r300_rs_swizzle); void (*rX00_rs_tex_write)(struct r300_rs_block*, int, int); boolean any_bcolor_used = vs_outputs->bcolor[0] != ATTR_UNUSED || @@ -350,7 +363,7 @@ static void r300_update_rs_block(struct r300_context *r300) /* Write it to the FS input register if it's needed by the FS. */ if (fs_inputs->color[i] != ATTR_UNUSED) { - rX00_rs_col_write(&rs, col_count, fp_offset); + rX00_rs_col_write(&rs, col_count, fp_offset, WRITE_COLOR); fp_offset++; DBG(r300, DBG_RS, @@ -398,6 +411,24 @@ static void r300_update_rs_block(struct r300_context *r300) } } + /* gl_FrontFacing. + * Note that we can use either the two-sided color selection based on + * the front and back vertex shader colors, or gl_FrontFacing, + * but not both! It locks up otherwise. + * + * In Direct3D 9, the two-sided color selection can be used + * with shaders 2.0 only, while gl_FrontFacing can be used + * with shaders 3.0 only. The hardware apparently hasn't been designed + * to support both at the same time. */ + if (r300->screen->caps.is_r500 && fs_inputs->face != ATTR_UNUSED && + !(any_bcolor_used && r300->two_sided_color)) { + rX00_rs_col(&rs, col_count, col_count, SWIZ_XYZW); + rX00_rs_col_write(&rs, col_count, fp_offset, WRITE_FACE); + fp_offset++; + col_count++; + DBG(r300, DBG_RS, "r300: Rasterized FACE written to FS.\n"); + } + /* Rasterize texture coordinates. */ for (i = 0; i < ATTR_GENERIC_COUNT && tex_count < 8; i++) { bool sprite_coord = !!(r300->sprite_coord_enable & (1 << i)); diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c index 1c8ba864a4..935dc9b0a8 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c @@ -307,3 +307,46 @@ void rc_transform_fragment_wpos(struct radeon_compiler * c, unsigned wpos, unsig } } + +/** + * The FACE input in hardware contains 1 if it's a back face, 0 otherwise. + * Gallium and OpenGL define it the other way around. + * + * So let's just negate FACE at the beginning of the shader and rewrite the rest + * of the shader to read from the newly allocated temporary. + */ +void rc_transform_fragment_face(struct radeon_compiler *c, unsigned face) +{ + unsigned tempregi = rc_find_free_temporary(c); + struct rc_instruction *inst_add; + struct rc_instruction *inst; + + /* perspective divide */ + inst_add = rc_insert_new_instruction(c, &c->Program.Instructions); + inst_add->U.I.Opcode = RC_OPCODE_ADD; + + inst_add->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_add->U.I.DstReg.Index = tempregi; + inst_add->U.I.DstReg.WriteMask = RC_MASK_X; + + inst_add->U.I.SrcReg[0].File = RC_FILE_NONE; + inst_add->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_1111; + + inst_add->U.I.SrcReg[1].File = RC_FILE_INPUT; + inst_add->U.I.SrcReg[1].Index = face; + inst_add->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_XXXX; + inst_add->U.I.SrcReg[1].Negate = RC_MASK_XYZW; + + for (inst = inst_add->Next; inst != &c->Program.Instructions; inst = inst->Next) { + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); + unsigned i; + + for(i = 0; i < opcode->NumSrcRegs; i++) { + if (inst->U.I.SrcReg[i].File == RC_FILE_INPUT && + inst->U.I.SrcReg[i].Index == face) { + inst->U.I.SrcReg[i].File = RC_FILE_TEMPORARY; + inst->U.I.SrcReg[i].Index = tempregi; + } + } + } +} diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h index e15291dd19..7c42eb3ae5 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h @@ -81,6 +81,7 @@ void rc_move_output(struct radeon_compiler * c, unsigned output, unsigned new_ou void rc_copy_output(struct radeon_compiler * c, unsigned output, unsigned dup_output); void rc_transform_fragment_wpos(struct radeon_compiler * c, unsigned wpos, unsigned new_input, int full_vtransform); +void rc_transform_fragment_face(struct radeon_compiler *c, unsigned face); struct r300_fragment_program_compiler { struct radeon_compiler Base; -- cgit v1.2.3 From ca5227ce8b749fa3f00438c41066def6e0a8dbe4 Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Wed, 11 Aug 2010 02:58:50 +0200 Subject: r300g: initialize VAP_VTX_STATE_CNTL This got lost during the rasterizer rewrite. --- src/gallium/drivers/r300/r300_state_derived.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c index 39000477cb..c047a127ba 100644 --- a/src/gallium/drivers/r300/r300_state_derived.c +++ b/src/gallium/drivers/r300/r300_state_derived.c @@ -337,6 +337,11 @@ static void r300_update_rs_block(struct r300_context *r300) rX00_rs_tex_write = r300_rs_tex_write; } + /* 0x5555 copied from classic, which means: + * Select user color 0 for COLOR0 up to COLOR7. + * What the hell does that mean? */ + rs.vap_vtx_state_cntl = 0x5555; + /* The position is always present in VAP. */ rs.vap_vsm_vtx_assm |= R300_INPUT_CNTL_POS; rs.vap_out_vtx_fmt[0] |= R300_VAP_OUTPUT_VTX_FMT_0__POS_PRESENT; -- cgit v1.2.3 From 0dcf0f9dfaa23b08d2bc20f8cbd02550c2632e52 Mon Sep 17 00:00:00 2001 From: Luca Barbieri Date: Wed, 11 Aug 2010 10:46:12 +0200 Subject: auxiliary: move Ben Skeggs' primitive splitter to common code This is a simple framework that handles splitting primitives in an abstract way. The user has to specify the primitive start, start index and count. Then, it can ask the primitive splitter to "draw" a chunk of the primitive, staying under a given vertex/index budget. The primitive splitter will then call user-supplied functions to emit a range of vertices/indices, as well as switch the edgeflag on or off. This is particularly useful for hardware that either has limits on the vertex count field, or where vertices are pushed on a FIFO or temporary buffer of limited size. Note that unlike other splitters, it does not manipulate data in any way, and merely asks a callback to do so, in vertex intervals. --- src/gallium/auxiliary/util/u_split_prim.h | 102 +++++++++++++++++++++++++++++ src/gallium/drivers/nouveau/nouveau_util.h | 100 ---------------------------- src/gallium/drivers/nv50/nv50_push.c | 2 +- src/gallium/drivers/nv50/nv50_vbo.c | 2 +- 4 files changed, 104 insertions(+), 102 deletions(-) create mode 100644 src/gallium/auxiliary/util/u_split_prim.h (limited to 'src/gallium/drivers') diff --git a/src/gallium/auxiliary/util/u_split_prim.h b/src/gallium/auxiliary/util/u_split_prim.h new file mode 100644 index 0000000000..3c438da0ba --- /dev/null +++ b/src/gallium/auxiliary/util/u_split_prim.h @@ -0,0 +1,102 @@ +/* Originally written by Ben Skeggs for the nv50 driver*/ +#include + +struct u_split_prim { + void *priv; + void (*emit)(void *priv, unsigned start, unsigned count); + void (*edge)(void *priv, boolean enabled); + + unsigned mode; + unsigned start; + unsigned p_start; + unsigned p_end; + + uint repeat_first:1; + uint close_first:1; + uint edgeflag_off:1; +}; + +static INLINE void +u_split_prim_init(struct u_split_prim *s, + unsigned mode, unsigned start, unsigned count) +{ + if (mode == PIPE_PRIM_LINE_LOOP) { + s->mode = PIPE_PRIM_LINE_STRIP; + s->close_first = 1; + } else { + s->mode = mode; + s->close_first = 0; + } + s->start = start; + s->p_start = start; + s->p_end = start + count; + s->edgeflag_off = 0; + s->repeat_first = 0; +} + +static INLINE boolean +u_split_prim_next(struct u_split_prim *s, unsigned max_verts) +{ + int repeat = 0; + + if (s->repeat_first) { + s->emit(s->priv, s->start, 1); + max_verts--; + if (s->edgeflag_off) { + s->edge(s->priv, TRUE); + s->edgeflag_off = FALSE; + } + } + + if (s->p_start + s->close_first + max_verts >= s->p_end) { + s->emit(s->priv, s->p_start, s->p_end - s->p_start); + if (s->close_first) + s->emit(s->priv, s->start, 1); + return TRUE; + } + + switch (s->mode) { + case PIPE_PRIM_LINES: + max_verts &= ~1; + break; + case PIPE_PRIM_LINE_STRIP: + repeat = 1; + break; + case PIPE_PRIM_POLYGON: + max_verts--; + s->emit(s->priv, s->p_start, max_verts); + s->edge(s->priv, FALSE); + s->emit(s->priv, s->p_start + max_verts, 1); + s->p_start += max_verts; + s->repeat_first = TRUE; + s->edgeflag_off = TRUE; + return FALSE; + case PIPE_PRIM_TRIANGLES: + max_verts = max_verts - (max_verts % 3); + break; + case PIPE_PRIM_TRIANGLE_STRIP: + /* to ensure winding stays correct, always split + * on an even number of generated triangles + */ + max_verts = max_verts & ~1; + repeat = 2; + break; + case PIPE_PRIM_TRIANGLE_FAN: + s->repeat_first = TRUE; + repeat = 1; + break; + case PIPE_PRIM_QUADS: + max_verts &= ~3; + break; + case PIPE_PRIM_QUAD_STRIP: + max_verts &= ~1; + repeat = 2; + break; + default: + break; + } + + s->emit (s->priv, s->p_start, max_verts); + s->p_start += (max_verts - repeat); + return FALSE; +} diff --git a/src/gallium/drivers/nouveau/nouveau_util.h b/src/gallium/drivers/nouveau/nouveau_util.h index a5e8537533..b165f7a611 100644 --- a/src/gallium/drivers/nouveau/nouveau_util.h +++ b/src/gallium/drivers/nouveau/nouveau_util.h @@ -88,104 +88,4 @@ static INLINE unsigned log2i(unsigned i) return r; } -struct u_split_prim { - void *priv; - void (*emit)(void *priv, unsigned start, unsigned count); - void (*edge)(void *priv, boolean enabled); - - unsigned mode; - unsigned start; - unsigned p_start; - unsigned p_end; - - uint repeat_first:1; - uint close_first:1; - uint edgeflag_off:1; -}; - -static INLINE void -u_split_prim_init(struct u_split_prim *s, - unsigned mode, unsigned start, unsigned count) -{ - if (mode == PIPE_PRIM_LINE_LOOP) { - s->mode = PIPE_PRIM_LINE_STRIP; - s->close_first = 1; - } else { - s->mode = mode; - s->close_first = 0; - } - s->start = start; - s->p_start = start; - s->p_end = start + count; - s->edgeflag_off = 0; - s->repeat_first = 0; -} - -static INLINE boolean -u_split_prim_next(struct u_split_prim *s, unsigned max_verts) -{ - int repeat = 0; - - if (s->repeat_first) { - s->emit(s->priv, s->start, 1); - max_verts--; - if (s->edgeflag_off) { - s->edge(s->priv, TRUE); - s->edgeflag_off = FALSE; - } - } - - if (s->p_start + s->close_first + max_verts >= s->p_end) { - s->emit(s->priv, s->p_start, s->p_end - s->p_start); - if (s->close_first) - s->emit(s->priv, s->start, 1); - return TRUE; - } - - switch (s->mode) { - case PIPE_PRIM_LINES: - max_verts &= ~1; - break; - case PIPE_PRIM_LINE_STRIP: - repeat = 1; - break; - case PIPE_PRIM_POLYGON: - max_verts--; - s->emit(s->priv, s->p_start, max_verts); - s->edge(s->priv, FALSE); - s->emit(s->priv, s->p_start + max_verts, 1); - s->p_start += max_verts; - s->repeat_first = TRUE; - s->edgeflag_off = TRUE; - return FALSE; - case PIPE_PRIM_TRIANGLES: - max_verts = max_verts - (max_verts % 3); - break; - case PIPE_PRIM_TRIANGLE_STRIP: - /* to ensure winding stays correct, always split - * on an even number of generated triangles - */ - max_verts = max_verts & ~1; - repeat = 2; - break; - case PIPE_PRIM_TRIANGLE_FAN: - s->repeat_first = TRUE; - repeat = 1; - break; - case PIPE_PRIM_QUADS: - max_verts &= ~3; - break; - case PIPE_PRIM_QUAD_STRIP: - max_verts &= ~1; - repeat = 2; - break; - default: - break; - } - - s->emit (s->priv, s->p_start, max_verts); - s->p_start += (max_verts - repeat); - return FALSE; -} - #endif diff --git a/src/gallium/drivers/nv50/nv50_push.c b/src/gallium/drivers/nv50/nv50_push.c index c3ac804146..ee87144dcf 100644 --- a/src/gallium/drivers/nv50/nv50_push.c +++ b/src/gallium/drivers/nv50/nv50_push.c @@ -2,8 +2,8 @@ #include "pipe/p_state.h" #include "util/u_inlines.h" #include "util/u_format.h" +#include "util/u_split_prim.h" -#include "nouveau/nouveau_util.h" #include "nv50_context.h" #include "nv50_resource.h" diff --git a/src/gallium/drivers/nv50/nv50_vbo.c b/src/gallium/drivers/nv50/nv50_vbo.c index e7f8fe33ed..0937668b84 100644 --- a/src/gallium/drivers/nv50/nv50_vbo.c +++ b/src/gallium/drivers/nv50/nv50_vbo.c @@ -24,8 +24,8 @@ #include "pipe/p_state.h" #include "util/u_inlines.h" #include "util/u_format.h" +#include "util/u_split_prim.h" -#include "nouveau/nouveau_util.h" #include "nv50_context.h" #include "nv50_resource.h" -- cgit v1.2.3 From eee5cea385b6871fa934a7882b2f214e3cbace8b Mon Sep 17 00:00:00 2001 From: Luca Barbieri Date: Wed, 11 Aug 2010 10:51:28 +0200 Subject: auxiliary: fix u_split_prim naming convention Current practice is to start identifiers with "util_" instead of "u_". --- src/gallium/auxiliary/util/u_split_prim.h | 6 +++--- src/gallium/drivers/nv50/nv50_push.c | 6 +++--- src/gallium/drivers/nv50/nv50_vbo.c | 6 +++--- 3 files changed, 9 insertions(+), 9 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/auxiliary/util/u_split_prim.h b/src/gallium/auxiliary/util/u_split_prim.h index 3c438da0ba..e526a73fc3 100644 --- a/src/gallium/auxiliary/util/u_split_prim.h +++ b/src/gallium/auxiliary/util/u_split_prim.h @@ -1,7 +1,7 @@ /* Originally written by Ben Skeggs for the nv50 driver*/ #include -struct u_split_prim { +struct util_split_prim { void *priv; void (*emit)(void *priv, unsigned start, unsigned count); void (*edge)(void *priv, boolean enabled); @@ -17,7 +17,7 @@ struct u_split_prim { }; static INLINE void -u_split_prim_init(struct u_split_prim *s, +util_split_prim_init(struct util_split_prim *s, unsigned mode, unsigned start, unsigned count) { if (mode == PIPE_PRIM_LINE_LOOP) { @@ -35,7 +35,7 @@ u_split_prim_init(struct u_split_prim *s, } static INLINE boolean -u_split_prim_next(struct u_split_prim *s, unsigned max_verts) +util_split_prim_next(struct util_split_prim *s, unsigned max_verts) { int repeat = 0; diff --git a/src/gallium/drivers/nv50/nv50_push.c b/src/gallium/drivers/nv50/nv50_push.c index ee87144dcf..6a2ffd5a3c 100644 --- a/src/gallium/drivers/nv50/nv50_push.c +++ b/src/gallium/drivers/nv50/nv50_push.c @@ -217,7 +217,7 @@ nv50_push_elements_instanced(struct pipe_context *pipe, 4; /* potential edgeflag enable/disable */ const unsigned v_overhead = 1 + /* VERTEX_DATA packet header */ 2; /* potential edgeflag modification */ - struct u_split_prim s; + struct util_split_prim s; unsigned vtx_size; boolean nzi = FALSE; int i; @@ -335,7 +335,7 @@ nv50_push_elements_instanced(struct pipe_context *pipe, ctx.attr[i].map = (uint8_t *)ctx.attr[i].map + ctx.attr[i].stride; } - u_split_prim_init(&s, mode, start, count); + util_split_prim_init(&s, mode, start, count); do { if (AVAIL_RING(chan) < p_overhead + (6 * vtx_size)) { FIRE_RING(chan); @@ -351,7 +351,7 @@ nv50_push_elements_instanced(struct pipe_context *pipe, BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BEGIN, 1); OUT_RING (chan, nv50_prim(s.mode) | (nzi ? (1 << 28) : 0)); - done = u_split_prim_next(&s, max_verts); + done = util_split_prim_next(&s, max_verts); BEGIN_RING(chan, tesla, NV50TCL_VERTEX_END, 1); OUT_RING (chan, 0); } while (!done); diff --git a/src/gallium/drivers/nv50/nv50_vbo.c b/src/gallium/drivers/nv50/nv50_vbo.c index 0937668b84..1f11950199 100644 --- a/src/gallium/drivers/nv50/nv50_vbo.c +++ b/src/gallium/drivers/nv50/nv50_vbo.c @@ -311,7 +311,7 @@ nv50_draw_elements_inline(struct pipe_context *pipe, struct pipe_transfer *transfer; struct instance a[16]; struct inline_ctx ctx; - struct u_split_prim s; + struct util_split_prim s; boolean nzi = FALSE; unsigned overhead; @@ -347,7 +347,7 @@ nv50_draw_elements_inline(struct pipe_context *pipe, unsigned max_verts; boolean done; - u_split_prim_init(&s, mode, start, count); + util_split_prim_init(&s, mode, start, count); do { if (AVAIL_RING(chan) < (overhead + 6)) { FIRE_RING(chan); @@ -366,7 +366,7 @@ nv50_draw_elements_inline(struct pipe_context *pipe, BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BEGIN, 1); OUT_RING (chan, nv50_prim(s.mode) | (nzi ? (1<<28) : 0)); - done = u_split_prim_next(&s, max_verts); + done = util_split_prim_next(&s, max_verts); BEGIN_RING(chan, tesla, NV50TCL_VERTEX_END, 1); OUT_RING (chan, 0); } while (!done); -- cgit v1.2.3 From 3deca2e284f0709e94bec5267febc7ccb34c17b8 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Wed, 11 Aug 2010 15:09:54 +0100 Subject: llvmpipe: Use single precision divide for one over area computation. --- src/gallium/drivers/llvmpipe/lp_setup_tri.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c index 7e432503c1..44696c73b4 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c @@ -525,7 +525,7 @@ do_triangle_ccw(struct lp_setup_context *setup, info.dx20 = info.v2[0][0] - info.v0[0][0]; info.dy01 = info.v0[0][1] - info.v1[0][1]; info.dy20 = info.v2[0][1] - info.v0[0][1]; - info.oneoverarea = 1.0 / (info.dx01 * info.dy20 - info.dx20 * info.dy01); + info.oneoverarea = 1.0f / (info.dx01 * info.dy20 - info.dx20 * info.dy01); info.frontfacing = frontfacing; /* Setup parameter interpolants: -- cgit v1.2.3 From 66f57235d5e507b17f3cbf1329a803337ca08666 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Wed, 11 Aug 2010 15:10:28 +0100 Subject: llvmpipe: Debug code to dump interpolation coefficients. --- src/gallium/drivers/llvmpipe/lp_setup_tri.c | 30 ++++++++++++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c index 44696c73b4..20e63ae51f 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c @@ -260,13 +260,13 @@ static void setup_tri_coefficients( struct lp_setup_context *setup, { unsigned fragcoord_usage_mask = TGSI_WRITEMASK_XYZ; unsigned slot; + unsigned i; /* setup interpolation for all the remaining attributes: */ for (slot = 0; slot < setup->fs.nr_inputs; slot++) { unsigned vert_attr = setup->fs.input[slot].src_index; unsigned usage_mask = setup->fs.input[slot].usage_mask; - unsigned i; switch (setup->fs.input[slot].interp) { case LP_INTERP_CONSTANT: @@ -316,6 +316,34 @@ static void setup_tri_coefficients( struct lp_setup_context *setup, /* The internal position input is in slot zero: */ setup_fragcoord_coef(tri, info, 0, fragcoord_usage_mask); + + if (0) { + for (i = 0; i < NUM_CHANNELS; i++) { + float a0 = tri->inputs.a0 [0][i]; + float dadx = tri->inputs.dadx[0][i]; + float dady = tri->inputs.dady[0][i]; + + debug_printf("POS.%c: a0 = %f, dadx = %f, dady = %f\n", + "xyzw"[i], + a0, dadx, dady); + } + + for (slot = 0; slot < setup->fs.nr_inputs; slot++) { + unsigned usage_mask = setup->fs.input[slot].usage_mask; + for (i = 0; i < NUM_CHANNELS; i++) { + if (usage_mask & (1 << i)) { + float a0 = tri->inputs.a0 [1 + slot][i]; + float dadx = tri->inputs.dadx[1 + slot][i]; + float dady = tri->inputs.dady[1 + slot][i]; + + debug_printf("IN[%u].%c: a0 = %f, dadx = %f, dady = %f\n", + slot, + "xyzw"[i], + a0, dadx, dady); + } + } + } + } } -- cgit v1.2.3 From 457378e031ffb89a2011604c7798a6f5f2142207 Mon Sep 17 00:00:00 2001 From: Jerome Glisse Date: Wed, 11 Aug 2010 12:19:33 -0400 Subject: r600g: add point/sprite rendering support Signed-off-by: Jerome Glisse --- src/gallium/drivers/r600/r600_shader.c | 60 ++++++++++++++++++++----- src/gallium/drivers/r600/r600_state.c | 23 ++++++++-- src/gallium/drivers/r600/r600d.h | 80 ++++++++++++++++++++++++++++++++++ 3 files changed, 149 insertions(+), 14 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index 8a778f5fd6..ca65bff24c 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -105,8 +105,8 @@ int r600_pipe_shader_create(struct pipe_context *ctx, struct r600_screen *rscreen = r600_screen(ctx->screen); int r; -//fprintf(stderr, "--------------------------------------------------------------\n"); -//tgsi_dump(tokens, 0); +fprintf(stderr, "--------------------------------------------------------------\n"); +tgsi_dump(tokens, 0); if (rpshader == NULL) return -ENOMEM; rpshader->shader.family = radeon_get_family(rscreen->rw); @@ -120,7 +120,7 @@ int r600_pipe_shader_create(struct pipe_context *ctx, R600_ERR("building bytecode failed !\n"); return r; } -//fprintf(stderr, "______________________________________________________________\n"); +fprintf(stderr, "______________________________________________________________\n"); return 0; } @@ -155,11 +155,14 @@ static int r600_pipe_shader_vs(struct pipe_context *ctx, struct r600_context_sta static int r600_pipe_shader_ps(struct pipe_context *ctx, struct r600_context_state *rpshader) { + const struct pipe_rasterizer_state *rasterizer; struct r600_screen *rscreen = r600_screen(ctx->screen); struct r600_shader *rshader = &rpshader->shader; + struct r600_context *rctx = r600_context(ctx); struct radeon_state *state; unsigned i, tmp, exports_ps, num_cout; + rasterizer = &rctx->rasterizer->state.rasterizer; rpshader->rstate = radeon_state_decref(rpshader->rstate); state = radeon_state(rscreen->rw, R600_PS_SHADER_TYPE, R600_PS_SHADER); if (state == NULL) @@ -171,6 +174,9 @@ static int r600_pipe_shader_ps(struct pipe_context *ctx, struct r600_context_sta rshader->input[i].name == TGSI_SEMANTIC_BCOLOR) { tmp |= S_028644_FLAT_SHADE(rshader->flat_shade); } + if (rasterizer->sprite_coord_enable & (1 << i)) { + tmp |= S_028644_PT_SPRITE_TEX(1); + } state->states[R600_PS_SHADER__SPI_PS_INPUT_CNTL_0 + i] = tmp; } @@ -340,7 +346,7 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s struct tgsi_full_immediate *immediate; struct r600_shader_ctx ctx; struct r600_bc_output output[32]; - unsigned output_done; + unsigned output_done, noutput; unsigned opcode; int i, r = 0, pos0; @@ -418,7 +424,8 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s } } /* export output */ - for (i = 0, pos0 = 0; i < shader->noutput; i++) { + noutput = shader->noutput; + for (i = 0, pos0 = 0; i < noutput; i++) { memset(&output[i], 0, sizeof(struct r600_bc_output)); output[i].gpr = shader->output[i].gpr; output[i].elem_size = 3; @@ -430,13 +437,19 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM; output[i].array_base = i - pos0; output[i].inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT; - switch (ctx.type == TGSI_PROCESSOR_VERTEX) { + switch (ctx.type) { case TGSI_PROCESSOR_VERTEX: if (shader->output[i].name == TGSI_SEMANTIC_POSITION) { output[i].array_base = 60; output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; /* position doesn't count in array_base */ - pos0 = 1; + pos0++; + } + if (shader->output[i].name == TGSI_SEMANTIC_PSIZE) { + output[i].array_base = 61; + output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; + /* position doesn't count in array_base */ + pos0++; } break; case TGSI_PROCESSOR_FRAGMENT: @@ -457,17 +470,42 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s r = -EINVAL; goto out_err; } - if (i == (shader->noutput - 1)) { - output[i].end_of_program = 1; + } + /* add fake param output for vertex shader if no param is exported */ + if (ctx.type == TGSI_PROCESSOR_VERTEX) { + for (i = 0, pos0 = 0; i < noutput; i++) { + if (output[i].type == V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM) { + pos0 = 1; + break; + } + } + if (!pos0) { + memset(&output[i], 0, sizeof(struct r600_bc_output)); + output[i].gpr = 0; + output[i].elem_size = 3; + output[i].swizzle_x = 0; + output[i].swizzle_y = 1; + output[i].swizzle_z = 2; + output[i].swizzle_w = 3; + output[i].barrier = 1; + output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM; + output[i].array_base = 0; + output[i].inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT; + noutput++; } } - for (i = shader->noutput - 1, output_done = 0; i >= 0; i--) { + /* set export done on last export of each type */ + for (i = noutput - 1, output_done = 0; i >= 0; i--) { + if (i == (noutput - 1)) { + output[i].end_of_program = 1; + } if (!(output_done & (1 << output[i].type))) { output_done |= (1 << output[i].type); output[i].inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE; } } - for (i = 0; i < shader->noutput; i++) { + /* add output to bytecode */ + for (i = 0; i < noutput; i++) { r = r600_bc_add_output(ctx.bc, &output[i]); if (r) goto out_err; diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index cad5185e32..a50b75cc79 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -769,6 +769,7 @@ static struct radeon_state *r600_rasterizer(struct r600_context *rctx) float offset_units = 0, offset_scale = 0; char depth = 0; unsigned offset_db_fmt_cntl = 0; + unsigned tmp; if (fb->zsbuf) { offset_units = state->offset_units; @@ -800,6 +801,18 @@ static struct radeon_state *r600_rasterizer(struct r600_context *rctx) if (rstate == NULL) return NULL; rstate->states[R600_RASTERIZER__SPI_INTERP_CONTROL_0] = 0x00000001; + if (state->sprite_coord_enable) { + rstate->states[R600_RASTERIZER__SPI_INTERP_CONTROL_0] |= + S_0286D4_PNT_SPRITE_ENA(1) | + S_0286D4_PNT_SPRITE_OVRD_X(2) | + S_0286D4_PNT_SPRITE_OVRD_Y(3) | + S_0286D4_PNT_SPRITE_OVRD_Z(0) | + S_0286D4_PNT_SPRITE_OVRD_W(1); + if (state->sprite_coord_mode != PIPE_SPRITE_COORD_UPPER_LEFT) { + rstate->states[R600_RASTERIZER__SPI_INTERP_CONTROL_0] |= + S_0286D4_PNT_SPRITE_TOP_1(1); + } + } rstate->states[R600_RASTERIZER__PA_CL_CLIP_CNTL] = 0x00000000; rstate->states[R600_RASTERIZER__PA_SU_SC_MODE_CNTL] = 0x00080000 | S_028814_CULL_FRONT((state->cull_face & PIPE_FACE_FRONT) ? 1 : 0) | @@ -808,10 +821,14 @@ static struct radeon_state *r600_rasterizer(struct r600_context *rctx) S_028814_POLY_OFFSET_FRONT_ENABLE(state->offset_tri) | S_028814_POLY_OFFSET_BACK_ENABLE(state->offset_tri) | S_028814_POLY_OFFSET_PARA_ENABLE(state->offset_tri); - rstate->states[R600_RASTERIZER__PA_CL_VS_OUT_CNTL] = 0x00000000; + rstate->states[R600_RASTERIZER__PA_CL_VS_OUT_CNTL] = + S_02881C_USE_VTX_POINT_SIZE(state->point_size_per_vertex) | + S_02881C_VS_OUT_MISC_VEC_ENA(state->point_size_per_vertex); rstate->states[R600_RASTERIZER__PA_CL_NANINF_CNTL] = 0x00000000; - rstate->states[R600_RASTERIZER__PA_SU_POINT_SIZE] = 0x00080008; - rstate->states[R600_RASTERIZER__PA_SU_POINT_MINMAX] = 0x00000000; + /* point size 12.4 fixed point */ + tmp = (unsigned)(state->point_size * 8.0 / 2.0); + rstate->states[R600_RASTERIZER__PA_SU_POINT_SIZE] = S_028A00_HEIGHT(tmp) | S_028A00_WIDTH(tmp); + rstate->states[R600_RASTERIZER__PA_SU_POINT_MINMAX] = 0x80000000; rstate->states[R600_RASTERIZER__PA_SU_LINE_CNTL] = 0x00000008; rstate->states[R600_RASTERIZER__PA_SC_LINE_STIPPLE] = 0x00000005; rstate->states[R600_RASTERIZER__PA_SC_MPASS_PS_CNTL] = 0x00000000; diff --git a/src/gallium/drivers/r600/r600d.h b/src/gallium/drivers/r600/r600d.h index af93731550..53388f822e 100644 --- a/src/gallium/drivers/r600/r600d.h +++ b/src/gallium/drivers/r600/r600d.h @@ -654,6 +654,13 @@ #define S_028E0C_OFFSET(x) (((x) & 0xFFFFFFFF) << 0) #define G_028E0C_OFFSET(x) (((x) >> 0) & 0xFFFFFFFF) #define C_028E0C_OFFSET 0x00000000 +#define R_028A00_PA_SU_POINT_SIZE 0x028A00 +#define S_028A00_HEIGHT(x) (((x) & 0xFFFF) << 0) +#define G_028A00_HEIGHT(x) (((x) >> 0) & 0xFFFF) +#define C_028A00_HEIGHT 0xFFFF0000 +#define S_028A00_WIDTH(x) (((x) & 0xFFFF) << 16) +#define G_028A00_WIDTH(x) (((x) >> 16) & 0xFFFF) +#define C_028A00_WIDTH 0x0000FFFF #define R_028A40_VGT_GS_MODE 0x028A40 #define S_028A40_MODE(x) (((x) & 0x3) << 0) #define G_028A40_MODE(x) (((x) >> 0) & 0x3) @@ -1153,6 +1160,79 @@ #define V_008958_DI_PT_2D_FILL_RECT_LIST 0x0000001A #define V_008958_DI_PT_2D_LINE_STRIP 0x0000001B #define V_008958_DI_PT_2D_TRI_STRIP 0x0000001C +#define R_02881C_PA_CL_VS_OUT_CNTL 0x02881C +#define S_02881C_CLIP_DIST_ENA_0(x) (((x) & 0x1) << 0) +#define G_02881C_CLIP_DIST_ENA_0(x) (((x) >> 0) & 0x1) +#define C_02881C_CLIP_DIST_ENA_0 0xFFFFFFFE +#define S_02881C_CLIP_DIST_ENA_1(x) (((x) & 0x1) << 1) +#define G_02881C_CLIP_DIST_ENA_1(x) (((x) >> 1) & 0x1) +#define C_02881C_CLIP_DIST_ENA_1 0xFFFFFFFD +#define S_02881C_CLIP_DIST_ENA_2(x) (((x) & 0x1) << 2) +#define G_02881C_CLIP_DIST_ENA_2(x) (((x) >> 2) & 0x1) +#define C_02881C_CLIP_DIST_ENA_2 0xFFFFFFFB +#define S_02881C_CLIP_DIST_ENA_3(x) (((x) & 0x1) << 3) +#define G_02881C_CLIP_DIST_ENA_3(x) (((x) >> 3) & 0x1) +#define C_02881C_CLIP_DIST_ENA_3 0xFFFFFFF7 +#define S_02881C_CLIP_DIST_ENA_4(x) (((x) & 0x1) << 4) +#define G_02881C_CLIP_DIST_ENA_4(x) (((x) >> 4) & 0x1) +#define C_02881C_CLIP_DIST_ENA_4 0xFFFFFFEF +#define S_02881C_CLIP_DIST_ENA_5(x) (((x) & 0x1) << 5) +#define G_02881C_CLIP_DIST_ENA_5(x) (((x) >> 5) & 0x1) +#define C_02881C_CLIP_DIST_ENA_5 0xFFFFFFDF +#define S_02881C_CLIP_DIST_ENA_6(x) (((x) & 0x1) << 6) +#define G_02881C_CLIP_DIST_ENA_6(x) (((x) >> 6) & 0x1) +#define C_02881C_CLIP_DIST_ENA_6 0xFFFFFFBF +#define S_02881C_CLIP_DIST_ENA_7(x) (((x) & 0x1) << 7) +#define G_02881C_CLIP_DIST_ENA_7(x) (((x) >> 7) & 0x1) +#define C_02881C_CLIP_DIST_ENA_7 0xFFFFFF7F +#define S_02881C_CULL_DIST_ENA_0(x) (((x) & 0x1) << 8) +#define G_02881C_CULL_DIST_ENA_0(x) (((x) >> 8) & 0x1) +#define C_02881C_CULL_DIST_ENA_0 0xFFFFFEFF +#define S_02881C_CULL_DIST_ENA_1(x) (((x) & 0x1) << 9) +#define G_02881C_CULL_DIST_ENA_1(x) (((x) >> 9) & 0x1) +#define C_02881C_CULL_DIST_ENA_1 0xFFFFFDFF +#define S_02881C_CULL_DIST_ENA_2(x) (((x) & 0x1) << 10) +#define G_02881C_CULL_DIST_ENA_2(x) (((x) >> 10) & 0x1) +#define C_02881C_CULL_DIST_ENA_2 0xFFFFFBFF +#define S_02881C_CULL_DIST_ENA_3(x) (((x) & 0x1) << 11) +#define G_02881C_CULL_DIST_ENA_3(x) (((x) >> 11) & 0x1) +#define C_02881C_CULL_DIST_ENA_3 0xFFFFF7FF +#define S_02881C_CULL_DIST_ENA_4(x) (((x) & 0x1) << 12) +#define G_02881C_CULL_DIST_ENA_4(x) (((x) >> 12) & 0x1) +#define C_02881C_CULL_DIST_ENA_4 0xFFFFEFFF +#define S_02881C_CULL_DIST_ENA_5(x) (((x) & 0x1) << 13) +#define G_02881C_CULL_DIST_ENA_5(x) (((x) >> 13) & 0x1) +#define C_02881C_CULL_DIST_ENA_5 0xFFFFDFFF +#define S_02881C_CULL_DIST_ENA_6(x) (((x) & 0x1) << 14) +#define G_02881C_CULL_DIST_ENA_6(x) (((x) >> 14) & 0x1) +#define C_02881C_CULL_DIST_ENA_6 0xFFFFBFFF +#define S_02881C_CULL_DIST_ENA_7(x) (((x) & 0x1) << 15) +#define G_02881C_CULL_DIST_ENA_7(x) (((x) >> 15) & 0x1) +#define C_02881C_CULL_DIST_ENA_7 0xFFFF7FFF +#define S_02881C_USE_VTX_POINT_SIZE(x) (((x) & 0x1) << 16) +#define G_02881C_USE_VTX_POINT_SIZE(x) (((x) >> 16) & 0x1) +#define C_02881C_USE_VTX_POINT_SIZE 0xFFFEFFFF +#define S_02881C_USE_VTX_EDGE_FLAG(x) (((x) & 0x1) << 17) +#define G_02881C_USE_VTX_EDGE_FLAG(x) (((x) >> 17) & 0x1) +#define C_02881C_USE_VTX_EDGE_FLAG 0xFFFDFFFF +#define S_02881C_USE_VTX_RENDER_TARGET_INDX(x) (((x) & 0x1) << 18) +#define G_02881C_USE_VTX_RENDER_TARGET_INDX(x) (((x) >> 18) & 0x1) +#define C_02881C_USE_VTX_RENDER_TARGET_INDX 0xFFFBFFFF +#define S_02881C_USE_VTX_VIEWPORT_INDX(x) (((x) & 0x1) << 19) +#define G_02881C_USE_VTX_VIEWPORT_INDX(x) (((x) >> 19) & 0x1) +#define C_02881C_USE_VTX_VIEWPORT_INDX 0xFFF7FFFF +#define S_02881C_USE_VTX_KILL_FLAG(x) (((x) & 0x1) << 20) +#define G_02881C_USE_VTX_KILL_FLAG(x) (((x) >> 20) & 0x1) +#define C_02881C_USE_VTX_KILL_FLAG 0xFFEFFFFF +#define S_02881C_VS_OUT_MISC_VEC_ENA(x) (((x) & 0x1) << 21) +#define G_02881C_VS_OUT_MISC_VEC_ENA(x) (((x) >> 21) & 0x1) +#define C_02881C_VS_OUT_MISC_VEC_ENA 0xFFDFFFFF +#define S_02881C_VS_OUT_CCDIST0_VEC_ENA(x) (((x) & 0x1) << 22) +#define G_02881C_VS_OUT_CCDIST0_VEC_ENA(x) (((x) >> 22) & 0x1) +#define C_02881C_VS_OUT_CCDIST0_VEC_ENA 0xFFBFFFFF +#define S_02881C_VS_OUT_CCDIST1_VEC_ENA(x) (((x) & 0x1) << 23) +#define G_02881C_VS_OUT_CCDIST1_VEC_ENA(x) (((x) >> 23) & 0x1) +#define C_02881C_VS_OUT_CCDIST1_VEC_ENA 0xFF7FFFFF #define R_028868_SQ_PGM_RESOURCES_VS 0x028868 #define S_028868_NUM_GPRS(x) (((x) & 0xFF) << 0) #define G_028868_NUM_GPRS(x) (((x) >> 0) & 0xFF) -- cgit v1.2.3 From 1bb0427a856ffa3fea1b177ea5b0395a00de3833 Mon Sep 17 00:00:00 2001 From: Jerome Glisse Date: Wed, 11 Aug 2010 13:49:06 -0400 Subject: r600g: add src negation support Should fix few glBitmap cases. Signed-off-by: Jerome Glisse --- src/gallium/drivers/r600/r600_shader.c | 1 + 1 file changed, 1 insertion(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index ca65bff24c..cbeb69221c 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -537,6 +537,7 @@ static int tgsi_src(struct r600_shader_ctx *ctx, if (tgsi_src->Register.File == TGSI_FILE_IMMEDIATE) { r600_src->sel = 0; } + r600_src->neg = tgsi_src->Register.Negate; r600_src->sel += ctx->file_offset[tgsi_src->Register.File]; return 0; } -- cgit v1.2.3 From 481b65abaedb271d0da24c75b8c60f7bcf6d8ce9 Mon Sep 17 00:00:00 2001 From: Jerome Glisse Date: Wed, 11 Aug 2010 14:26:07 -0400 Subject: r600g: accept empty frag prog shader Signed-off-by: Jerome Glisse --- src/gallium/drivers/r600/r600_asm.c | 3 +-- src/gallium/drivers/r600/r600_shader.c | 15 +++++++++++++++ src/gallium/drivers/r600/r600_state.c | 2 +- 3 files changed, 17 insertions(+), 3 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c index ae818bf19b..9ea9d4354d 100644 --- a/src/gallium/drivers/r600/r600_asm.c +++ b/src/gallium/drivers/r600/r600_asm.c @@ -167,8 +167,7 @@ int r600_bc_add_literal(struct r600_bc *bc, const u32 *value) struct r600_bc_alu *alu; if (bc->cf_last == NULL) { - R600_ERR("no last CF\n"); - return -EINVAL; + return 0; } if (bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_TEX) { return 0; diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index cbeb69221c..956c7e7930 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -494,6 +494,21 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s noutput++; } } + /* add fake pixel export */ + if (ctx.type == TGSI_PROCESSOR_FRAGMENT && !noutput) { + memset(&output[0], 0, sizeof(struct r600_bc_output)); + output[0].gpr = 0; + output[0].elem_size = 3; + output[0].swizzle_x = 7; + output[0].swizzle_y = 7; + output[0].swizzle_z = 7; + output[0].swizzle_w = 7; + output[0].barrier = 1; + output[0].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM; + output[0].array_base = 0; + output[0].inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT; + noutput++; + } /* set export done on last export of each type */ for (i = noutput - 1, output_done = 0; i >= 0; i--) { if (i == (noutput - 1)) { diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index a50b75cc79..ed2d9f9984 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -727,7 +727,7 @@ static struct radeon_state *r600_db(struct r600_context *rctx) struct r600_resource *rbuffer; struct radeon_state *rstate; const struct pipe_framebuffer_state *state = &rctx->framebuffer->state.framebuffer; - unsigned level = state->cbufs[0]->level; + unsigned level; unsigned pitch, slice, format; if (state->zsbuf == NULL) -- cgit v1.2.3 From f78445de5d2316934ebeaa19a616d2f960c89237 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Wed, 11 Aug 2010 09:25:45 -0700 Subject: r300/compiler: Implement the CONT opcode. --- src/gallium/drivers/r300/r300_tgsi_to_rc.c | 2 +- .../drivers/dri/r300/compiler/r500_fragprog_emit.c | 19 ++++++++++++++++--- .../dri/r300/compiler/radeon_dataflow_deadcode.c | 2 +- src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c | 4 ++-- src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h | 2 +- 5 files changed, 21 insertions(+), 8 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_tgsi_to_rc.c b/src/gallium/drivers/r300/r300_tgsi_to_rc.c index 51b2c55550..dd697b9c37 100644 --- a/src/gallium/drivers/r300/r300_tgsi_to_rc.c +++ b/src/gallium/drivers/r300/r300_tgsi_to_rc.c @@ -126,7 +126,7 @@ static unsigned translate_opcode(unsigned opcode) /* case TGSI_OPCODE_SAD: return RC_OPCODE_SAD; */ /* case TGSI_OPCODE_TXF: return RC_OPCODE_TXF; */ /* case TGSI_OPCODE_TXQ: return RC_OPCODE_TXQ; */ - /* case TGSI_OPCODE_CONT: return RC_OPCODE_CONT; */ + case TGSI_OPCODE_CONT: return RC_OPCODE_CONT; /* case TGSI_OPCODE_EMIT: return RC_OPCODE_EMIT; */ /* case TGSI_OPCODE_ENDPRIM: return RC_OPCODE_ENDPRIM; */ /* case TGSI_OPCODE_BGNLOOP2: return RC_OPCODE_BGNLOOP2; */ diff --git a/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c b/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c index c3f817ad4e..dfad12eb15 100644 --- a/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c +++ b/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c @@ -70,6 +70,10 @@ struct loop_info { int * Brks; int BrkCount; int BrkReserved; + + int * Conts; + int ContCount; + int ContReserved; }; struct emit_state { @@ -413,15 +417,18 @@ static void emit_flowcontrol(struct emit_state * s, struct rc_instruction * inst ; break; - case RC_OPCODE_CONTINUE: + case RC_OPCODE_CONT: loop = &s->Loops[s->CurrentLoopDepth - 1]; - s->Code->inst[newip].inst2 = R500_FC_OP_JUMP + memory_pool_array_reserve(&s->C->Pool, int, loop->Conts, + loop->ContCount, loop->ContReserved, 1); + loop->Conts[loop->ContCount++] = newip; + s->Code->inst[newip].inst2 = R500_FC_OP_CONTINUE | R500_FC_JUMP_FUNC(0xff) | R500_FC_B_OP1_DECR | R500_FC_B_POP_CNT( s->CurrentBranchDepth - loop->BranchDepth) + | R500_FC_IGNORE_UNCOVERED ; - s->Code->inst[newip].inst3 = R500_FC_JUMP_ADDR(loop->BgnLoop); break; case RC_OPCODE_ENDLOOP: @@ -449,6 +456,12 @@ static void emit_flowcontrol(struct emit_state * s, struct rc_instruction * inst s->Code->inst[loop->Brks[loop->BrkCount]].inst3 = R500_FC_JUMP_ADDR(newip + 1); } + + /* Set jump address for CONT instructions. */ + while(loop->ContCount--) { + s->Code->inst[loop->Conts[loop->ContCount]].inst3 = + R500_FC_JUMP_ADDR(newip); + } s->CurrentLoopDepth--; break; } diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_deadcode.c b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_deadcode.c index 31566a937f..faf531b412 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_deadcode.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_deadcode.c @@ -274,7 +274,7 @@ void rc_dataflow_deadcode(struct radeon_compiler * c, rc_dataflow_mark_outputs_f } break; } - case RC_OPCODE_CONTINUE: + case RC_OPCODE_CONT: break; case RC_OPCODE_ENDIF: push_branch(&s); diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c index 04f234f11d..2ea830be7f 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c @@ -386,8 +386,8 @@ struct rc_opcode_info rc_opcodes[MAX_RC_OPCODE] = { .NumSrcRegs = 0, }, { - .Opcode = RC_OPCODE_CONTINUE, - .Name = "CONTINUE", + .Opcode = RC_OPCODE_CONT, + .Name = "CONT", .IsFlowControl = 1, .NumSrcRegs = 0 }, diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h index 8b9fa07dde..6e18d6eb3f 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h @@ -187,7 +187,7 @@ typedef enum { RC_OPCODE_ENDLOOP, - RC_OPCODE_CONTINUE, + RC_OPCODE_CONT, /** special instruction, used in R300-R500 fragment program pair instructions * indicates that the result of the alpha operation shall be replicated -- cgit v1.2.3 From de4784e36505316c2a5ab34cc5b371d17f38d3c5 Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Thu, 12 Aug 2010 05:06:21 +0200 Subject: u_blitter: unify clear_depth_stencil and flush_depth_stencil No need to enable depth test for clear. --- src/gallium/auxiliary/util/u_blitter.c | 46 ---------------------------------- src/gallium/auxiliary/util/u_blitter.h | 2 -- src/gallium/drivers/r300/r300_blit.c | 4 ++- 3 files changed, 3 insertions(+), 49 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/auxiliary/util/u_blitter.c b/src/gallium/auxiliary/util/u_blitter.c index b5b86b7214..1b9e957e3c 100644 --- a/src/gallium/auxiliary/util/u_blitter.c +++ b/src/gallium/auxiliary/util/u_blitter.c @@ -87,7 +87,6 @@ struct blitter_context_priv void *dsa_write_depth_keep_stencil; void *dsa_keep_depth_stencil; void *dsa_keep_depth_write_stencil; - void *dsa_flush_depth_stencil; void *velem_state; @@ -158,12 +157,6 @@ struct blitter_context *util_blitter_create(struct pipe_context *pipe) pipe->create_depth_stencil_alpha_state(pipe, &dsa); dsa.depth.writemask = 1; - ctx->dsa_flush_depth_stencil = - pipe->create_depth_stencil_alpha_state(pipe, &dsa); - - dsa.depth.enabled = 1; - dsa.depth.writemask = 1; - dsa.depth.func = PIPE_FUNC_ALWAYS; ctx->dsa_write_depth_keep_stencil = pipe->create_depth_stencil_alpha_state(pipe, &dsa); @@ -945,42 +938,3 @@ void util_blitter_clear_depth_stencil(struct blitter_context *blitter, UTIL_BLITTER_ATTRIB_NONE, NULL); blitter_restore_CSOs(ctx); } - -/* Clear a region of a depth stencil surface. */ -void util_blitter_flush_depth_stencil(struct blitter_context *blitter, - struct pipe_surface *dstsurf) -{ - struct blitter_context_priv *ctx = (struct blitter_context_priv*)blitter; - struct pipe_context *pipe = ctx->base.pipe; - struct pipe_framebuffer_state fb_state; - - assert(dstsurf->texture); - if (!dstsurf->texture) - return; - - /* check the saved state */ - blitter_check_saved_CSOs(ctx); - assert(blitter->saved_fb_state.nr_cbufs != ~0); - - /* bind CSOs */ - pipe->bind_blend_state(pipe, ctx->blend_keep_color); - pipe->bind_depth_stencil_alpha_state(pipe, ctx->dsa_flush_depth_stencil); - - pipe->bind_rasterizer_state(pipe, ctx->rs_state); - pipe->bind_fs_state(pipe, blitter_get_fs_col(ctx, 0)); - pipe->bind_vs_state(pipe, ctx->vs_col); - pipe->bind_vertex_elements_state(pipe, ctx->velem_state); - - /* set a framebuffer state */ - fb_state.width = dstsurf->width; - fb_state.height = dstsurf->height; - fb_state.nr_cbufs = 0; - fb_state.cbufs[0] = 0; - fb_state.zsbuf = dstsurf; - pipe->set_framebuffer_state(pipe, &fb_state); - - blitter_set_dst_dimensions(ctx, dstsurf->width, dstsurf->height); - blitter->draw_rectangle(blitter, 0, 0, dstsurf->width, dstsurf->height, 0, - UTIL_BLITTER_ATTRIB_NONE, NULL); - blitter_restore_CSOs(ctx); -} diff --git a/src/gallium/auxiliary/util/u_blitter.h b/src/gallium/auxiliary/util/u_blitter.h index f316587dea..ba3f92eca8 100644 --- a/src/gallium/auxiliary/util/u_blitter.h +++ b/src/gallium/auxiliary/util/u_blitter.h @@ -200,8 +200,6 @@ void util_blitter_clear_depth_stencil(struct blitter_context *blitter, unsigned dstx, unsigned dsty, unsigned width, unsigned height); -void util_blitter_flush_depth_stencil(struct blitter_context *blitter, - struct pipe_surface *dstsurf); /* The functions below should be used to save currently bound constant state * objects inside a driver. The objects are automatically restored at the end * of the util_blitter_{clear, copy_region, fill_region} functions and then diff --git a/src/gallium/drivers/r300/r300_blit.c b/src/gallium/drivers/r300/r300_blit.c index ff52286b5c..67e8288440 100644 --- a/src/gallium/drivers/r300/r300_blit.c +++ b/src/gallium/drivers/r300/r300_blit.c @@ -279,7 +279,9 @@ void r300_flush_depth_stencil(struct pipe_context *pipe, PIPE_BIND_DEPTH_STENCIL); r300->z_decomp_rd = TRUE; r300_blitter_begin(r300, R300_CLEAR_SURFACE); - util_blitter_flush_depth_stencil(r300->blitter, dstsurf); + util_blitter_clear_depth_stencil(r300->blitter, dstsurf, + PIPE_CLEAR_DEPTH, 0, 0, + 0, 0, dstsurf->width, dstsurf->height); r300_blitter_end(r300); r300->z_decomp_rd = FALSE; -- cgit v1.2.3 From e2df0a8b234efde140b340c2c9b67b06b789b758 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Thu, 12 Aug 2010 16:06:40 +1000 Subject: r600g: improve texture format checker. This takes the r300g texture format checker and fixes it up for r600g, it passes glean texSwizzle, pixelformats, and texture_srgb tests, however I think it L8S8_SRGB is broken as is L8_SRGB, need to investigate. Signed-off-by: Dave Airlie --- src/gallium/drivers/r600/r600_context.h | 3 + src/gallium/drivers/r600/r600_state.c | 22 +-- src/gallium/drivers/r600/r600_state_inlines.h | 2 +- src/gallium/drivers/r600/r600_texture.c | 248 ++++++++++++++++++++++++++ 4 files changed, 263 insertions(+), 12 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/r600_context.h b/src/gallium/drivers/r600/r600_context.h index c606dbbda3..76d5de8653 100644 --- a/src/gallium/drivers/r600/r600_context.h +++ b/src/gallium/drivers/r600/r600_context.h @@ -175,4 +175,7 @@ extern int r600_pipe_shader_update(struct pipe_context *ctx, #define R600_ERR(fmt, args...) \ fprintf(stderr, "EE %s/%s:%d - "fmt, __FILE__, __func__, __LINE__, ##args) +uint32_t r600_translate_texformat(enum pipe_format format, + const unsigned char *swizzle_view, + uint32_t *word4_p, uint32_t *yuv_format_p); #endif diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index ed2d9f9984..46e8f2ae1f 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -1169,8 +1169,16 @@ static struct radeon_state *r600_resource(struct r600_context *rctx, struct r600_resource *rbuffer; struct radeon_state *rstate; unsigned format; - - format = r600_translate_colorformat(view->texture->format); + uint32_t word4 = 0, yuv_format = 0; + unsigned char swizzle[4]; + + swizzle[0] = view->swizzle_r; + swizzle[1] = view->swizzle_g; + swizzle[2] = view->swizzle_b; + swizzle[3] = view->swizzle_a; + format = r600_translate_texformat(view->texture->format, + swizzle, + &word4, &yuv_format); if (format == ~0) return NULL; desc = util_format_description(view->texture->format); @@ -1204,18 +1212,10 @@ static struct radeon_state *r600_resource(struct r600_context *rctx, rstate->states[R600_PS_RESOURCE__RESOURCE0_WORD2] = 0; rstate->states[R600_PS_RESOURCE__RESOURCE0_WORD3] = tmp->offset[1] >> 8; rstate->states[R600_PS_RESOURCE__RESOURCE0_WORD4] = - S_038010_FORMAT_COMP_X(r600_format_type(UTIL_FORMAT_TYPE_UNSIGNED)) | - S_038010_FORMAT_COMP_Y(r600_format_type(UTIL_FORMAT_TYPE_UNSIGNED)) | - S_038010_FORMAT_COMP_Z(r600_format_type(UTIL_FORMAT_TYPE_UNSIGNED)) | - S_038010_FORMAT_COMP_W(r600_format_type(UTIL_FORMAT_TYPE_UNSIGNED)) | + word4 | S_038010_NUM_FORMAT_ALL(V_038010_SQ_NUM_FORMAT_NORM) | S_038010_SRF_MODE_ALL(V_038010_SFR_MODE_NO_ZERO) | S_038010_REQUEST_SIZE(1) | - S_038010_DST_SEL_X(r600_tex_swizzle(view->swizzle_b)) | - S_038010_DST_SEL_Y(r600_tex_swizzle(view->swizzle_g)) | - S_038010_DST_SEL_Z(r600_tex_swizzle(view->swizzle_r)) | - S_038010_DST_SEL_W(r600_tex_swizzle(view->swizzle_a)) | - S_038010_FORCE_DEGAMMA(desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB ? 1 : 0) | S_038010_BASE_LEVEL(view->first_level); rstate->states[R600_PS_RESOURCE__RESOURCE0_WORD5] = S_038014_LAST_LEVEL(view->last_level) | diff --git a/src/gallium/drivers/r600/r600_state_inlines.h b/src/gallium/drivers/r600/r600_state_inlines.h index 8271ad19fb..060a27cd6f 100644 --- a/src/gallium/drivers/r600/r600_state_inlines.h +++ b/src/gallium/drivers/r600/r600_state_inlines.h @@ -289,7 +289,7 @@ static INLINE uint32_t r600_translate_colorformat(enum pipe_format format) static INLINE boolean r600_is_sampler_format_supported(enum pipe_format format) { - return r600_translate_colorformat(format) != ~0; + return r600_translate_texformat(format, NULL, NULL, NULL) != ~0; } static INLINE boolean r600_is_colorbuffer_format_supported(enum pipe_format format) diff --git a/src/gallium/drivers/r600/r600_texture.c b/src/gallium/drivers/r600/r600_texture.c index 1bce911306..30d79ebdd6 100644 --- a/src/gallium/drivers/r600/r600_texture.c +++ b/src/gallium/drivers/r600/r600_texture.c @@ -33,6 +33,7 @@ #include "r600_screen.h" #include "r600_context.h" #include "r600_resource.h" +#include "r600d.h" extern struct u_resource_vtbl r600_texture_vtbl; @@ -277,3 +278,250 @@ void r600_init_screen_texture_functions(struct pipe_screen *screen) screen->get_tex_surface = r600_get_tex_surface; screen->tex_surface_destroy = r600_tex_surface_destroy; } + +static unsigned r600_get_swizzle_combined(const unsigned char *swizzle_format, + const unsigned char *swizzle_view) +{ + unsigned i; + unsigned char swizzle[4]; + unsigned result = 0; + const uint32_t swizzle_shift[4] = { + 16, 19, 22, 25, + }; + const uint32_t swizzle_bit[4] = { + 0, 1, 2, 3, + }; + + if (swizzle_view) { + /* Combine two sets of swizzles. */ + for (i = 0; i < 4; i++) { + swizzle[i] = swizzle_view[i] <= UTIL_FORMAT_SWIZZLE_W ? + swizzle_format[swizzle_view[i]] : swizzle_view[i]; + } + } else { + memcpy(swizzle, swizzle_format, 4); + } + + /* Get swizzle. */ + for (i = 0; i < 4; i++) { + switch (swizzle[i]) { + case UTIL_FORMAT_SWIZZLE_Y: + result |= swizzle_bit[1] << swizzle_shift[i]; + break; + case UTIL_FORMAT_SWIZZLE_Z: + result |= swizzle_bit[2] << swizzle_shift[i]; + break; + case UTIL_FORMAT_SWIZZLE_W: + result |= swizzle_bit[3] << swizzle_shift[i]; + break; + case UTIL_FORMAT_SWIZZLE_0: + result |= V_038010_SQ_SEL_0 << swizzle_shift[i]; + break; + case UTIL_FORMAT_SWIZZLE_1: + result |= V_038010_SQ_SEL_1 << swizzle_shift[i]; + break; + default: /* UTIL_FORMAT_SWIZZLE_X */ + result |= swizzle_bit[0] << swizzle_shift[i]; + } + } + return result; +} + +/* texture format translate */ +uint32_t r600_translate_texformat(enum pipe_format format, + const unsigned char *swizzle_view, + uint32_t *word4_p, uint32_t *yuv_format_p) +{ + uint32_t result = 0, word4 = 0, yuv_format = 0; + const struct util_format_description *desc; + boolean uniform = TRUE; + int i; + const uint32_t sign_bit[4] = { + S_038010_FORMAT_COMP_X(V_038010_SQ_FORMAT_COMP_SIGNED), + S_038010_FORMAT_COMP_Y(V_038010_SQ_FORMAT_COMP_SIGNED), + S_038010_FORMAT_COMP_Z(V_038010_SQ_FORMAT_COMP_SIGNED), + S_038010_FORMAT_COMP_W(V_038010_SQ_FORMAT_COMP_SIGNED) + }; + desc = util_format_description(format); + + /* Colorspace (return non-RGB formats directly). */ + switch (desc->colorspace) { + /* Depth stencil formats */ + case UTIL_FORMAT_COLORSPACE_ZS: + switch (format) { + case PIPE_FORMAT_Z16_UNORM: + result = V_028010_DEPTH_16; + goto out_word4; + case PIPE_FORMAT_Z24X8_UNORM: + result = V_028010_DEPTH_X8_24; + goto out_word4; + case PIPE_FORMAT_Z24_UNORM_S8_USCALED: + result = V_028010_DEPTH_8_24; + goto out_word4; + default: + goto out_unknown; + } + + case UTIL_FORMAT_COLORSPACE_YUV: + yuv_format |= (1 << 30); + switch (format) { + case PIPE_FORMAT_UYVY: + case PIPE_FORMAT_YUYV: + default: + break; + } + goto out_unknown; /* TODO */ + + case UTIL_FORMAT_COLORSPACE_SRGB: + word4 |= S_038010_FORCE_DEGAMMA(1); + if (format == PIPE_FORMAT_L8A8_SRGB || format == PIPE_FORMAT_L8_SRGB) + goto out_unknown; /* fails for some reason - TODO */ + break; + + default: + break; + } + + word4 |= r600_get_swizzle_combined(desc->swizzle, swizzle_view); + + /* S3TC formats. TODO */ + if (desc->layout == UTIL_FORMAT_LAYOUT_S3TC) { + goto out_unknown; + } + + + for (i = 0; i < desc->nr_channels; i++) { + if (desc->channel[i].type == UTIL_FORMAT_TYPE_SIGNED) { + word4 |= sign_bit[i]; + } + } + + /* R8G8Bx_SNORM - TODO CxV8U8 */ + + /* RGTC - TODO */ + + /* See whether the components are of the same size. */ + for (i = 1; i < desc->nr_channels; i++) { + uniform = uniform && desc->channel[0].size == desc->channel[i].size; + } + + /* Non-uniform formats. */ + if (!uniform) { + switch(desc->nr_channels) { + case 3: + if (desc->channel[0].size == 5 && + desc->channel[1].size == 6 && + desc->channel[2].size == 5) { + result |= V_0280A0_COLOR_5_6_5; + goto out_word4; + } + goto out_unknown; + case 4: + if (desc->channel[0].size == 5 && + desc->channel[1].size == 5 && + desc->channel[2].size == 5 && + desc->channel[3].size == 1) { + result |= V_0280A0_COLOR_1_5_5_5; + goto out_word4; + } + if (desc->channel[0].size == 10 && + desc->channel[1].size == 10 && + desc->channel[2].size == 10 && + desc->channel[3].size == 2) { + result |= V_0280A0_COLOR_10_10_10_2; + goto out_word4; + } + goto out_unknown; + } + goto out_unknown; + } + + /* uniform formats */ + switch (desc->channel[0].type) { + case UTIL_FORMAT_TYPE_UNSIGNED: + case UTIL_FORMAT_TYPE_SIGNED: + if (!desc->channel[0].normalized && + desc->colorspace != UTIL_FORMAT_COLORSPACE_SRGB) { + goto out_unknown; + } + + switch (desc->channel[0].size) { + case 4: + switch (desc->nr_channels) { + case 2: + result |= V_0280A0_COLOR_4_4; + goto out_word4; + case 4: + result |= V_0280A0_COLOR_4_4_4_4; + goto out_word4; + } + goto out_unknown; + case 8: + switch (desc->nr_channels) { + case 1: + result |= V_0280A0_COLOR_8; + goto out_word4; + case 2: + result |= V_0280A0_COLOR_8_8; + goto out_word4; + case 4: + result |= V_0280A0_COLOR_8_8_8_8; + goto out_word4; + } + goto out_unknown; + case 16: + switch (desc->nr_channels) { + case 1: + result |= V_0280A0_COLOR_16; + goto out_word4; + case 2: + result |= V_0280A0_COLOR_16_16; + goto out_word4; + case 4: + result |= V_0280A0_COLOR_16_16_16_16; + goto out_word4; + } + } + goto out_unknown; + + case UTIL_FORMAT_TYPE_FLOAT: + switch (desc->channel[0].size) { + case 16: + switch (desc->nr_channels) { + case 1: + result |= V_0280A0_COLOR_16_FLOAT; + goto out_word4; + case 2: + result |= V_0280A0_COLOR_16_16_FLOAT; + goto out_word4; + case 4: + result |= V_0280A0_COLOR_16_16_16_16_FLOAT; + goto out_word4; + } + goto out_unknown; + case 32: + switch (desc->nr_channels) { + case 1: + result |= V_0280A0_COLOR_32_FLOAT; + goto out_word4; + case 2: + result |= V_0280A0_COLOR_32_32_FLOAT; + goto out_word4; + case 4: + result |= V_0280A0_COLOR_32_32_32_32_FLOAT; + goto out_word4; + } + } + + } +out_word4: + if (word4_p) + *word4_p = word4; + if (yuv_format_p) + *yuv_format_p = yuv_format; +// fprintf(stderr,"returning %08x %08x %08x\n", result, word4, yuv_format); + return result; +out_unknown: +// R600_ERR("Unable to handle texformat %d %s\n", format, util_format_name(format)); + return ~0; +} -- cgit v1.2.3 From 13bc2098ca21be3d11176e558ca71e29e41a239f Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Thu, 12 Aug 2010 16:29:04 +1000 Subject: r600g: fix provoking-vertex piglit test. --- src/gallium/drivers/r600/r600_state.c | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index 46e8f2ae1f..b9b46d64e3 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -770,7 +770,7 @@ static struct radeon_state *r600_rasterizer(struct r600_context *rctx) char depth = 0; unsigned offset_db_fmt_cntl = 0; unsigned tmp; - + unsigned prov_vtx = 1; if (fb->zsbuf) { offset_units = state->offset_units; offset_scale = state->offset_scale * 12.0f; @@ -796,6 +796,9 @@ static struct radeon_state *r600_rasterizer(struct r600_context *rctx) } offset_db_fmt_cntl |= S_028DF8_POLY_OFFSET_NEG_NUM_DB_BITS(depth); + if (state->flatshade_first) + prov_vtx = 0; + rctx->flat_shade = state->flatshade; rstate = radeon_state(rscreen->rw, R600_RASTERIZER_TYPE, R600_RASTERIZER); if (rstate == NULL) @@ -814,13 +817,14 @@ static struct radeon_state *r600_rasterizer(struct r600_context *rctx) } } rstate->states[R600_RASTERIZER__PA_CL_CLIP_CNTL] = 0x00000000; - rstate->states[R600_RASTERIZER__PA_SU_SC_MODE_CNTL] = 0x00080000 | - S_028814_CULL_FRONT((state->cull_face & PIPE_FACE_FRONT) ? 1 : 0) | - S_028814_CULL_BACK((state->cull_face & PIPE_FACE_BACK) ? 1 : 0) | - S_028814_FACE(!state->front_ccw) | - S_028814_POLY_OFFSET_FRONT_ENABLE(state->offset_tri) | - S_028814_POLY_OFFSET_BACK_ENABLE(state->offset_tri) | - S_028814_POLY_OFFSET_PARA_ENABLE(state->offset_tri); + rstate->states[R600_RASTERIZER__PA_SU_SC_MODE_CNTL] = + S_028814_PROVOKING_VTX_LAST(prov_vtx) | + S_028814_CULL_FRONT((state->cull_face & PIPE_FACE_FRONT) ? 1 : 0) | + S_028814_CULL_BACK((state->cull_face & PIPE_FACE_BACK) ? 1 : 0) | + S_028814_FACE(!state->front_ccw) | + S_028814_POLY_OFFSET_FRONT_ENABLE(state->offset_tri) | + S_028814_POLY_OFFSET_BACK_ENABLE(state->offset_tri) | + S_028814_POLY_OFFSET_PARA_ENABLE(state->offset_tri); rstate->states[R600_RASTERIZER__PA_CL_VS_OUT_CNTL] = S_02881C_USE_VTX_POINT_SIZE(state->point_size_per_vertex) | S_02881C_VS_OUT_MISC_VEC_ENA(state->point_size_per_vertex); -- cgit v1.2.3 From 582129ced65e0a71478a8e75e00397037c4826d8 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Thu, 12 Aug 2010 16:32:19 +1000 Subject: r600g: fix typo in stencil translate. fixes piglit stencil-twoside and stencil-wrap --- src/gallium/drivers/r600/r600_state_inlines.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/r600_state_inlines.h b/src/gallium/drivers/r600/r600_state_inlines.h index 060a27cd6f..f93c20da35 100644 --- a/src/gallium/drivers/r600/r600_state_inlines.h +++ b/src/gallium/drivers/r600/r600_state_inlines.h @@ -110,7 +110,7 @@ static INLINE uint32_t r600_translate_stencil_op(int s_op) case PIPE_STENCIL_OP_DECR: return V_028800_STENCIL_DECR; case PIPE_STENCIL_OP_INCR_WRAP: - return V_028800_STENCIL_INVERT; + return V_028800_STENCIL_INCR_WRAP; case PIPE_STENCIL_OP_DECR_WRAP: return V_028800_STENCIL_DECR_WRAP; case PIPE_STENCIL_OP_INVERT: -- cgit v1.2.3 From f668ea11bd0b1f662e0be523a4bc46835e011ffa Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Thu, 12 Aug 2010 13:34:53 +0200 Subject: Revert "u_blitter: unify clear_depth_stencil and flush_depth_stencil" This reverts commit de4784e36505316c2a5ab34cc5b371d17f38d3c5. --- src/gallium/auxiliary/util/u_blitter.c | 46 ++++++++++++++++++++++++++++++++++ src/gallium/auxiliary/util/u_blitter.h | 2 ++ src/gallium/drivers/r300/r300_blit.c | 4 +-- 3 files changed, 49 insertions(+), 3 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/auxiliary/util/u_blitter.c b/src/gallium/auxiliary/util/u_blitter.c index 1b9e957e3c..b5b86b7214 100644 --- a/src/gallium/auxiliary/util/u_blitter.c +++ b/src/gallium/auxiliary/util/u_blitter.c @@ -87,6 +87,7 @@ struct blitter_context_priv void *dsa_write_depth_keep_stencil; void *dsa_keep_depth_stencil; void *dsa_keep_depth_write_stencil; + void *dsa_flush_depth_stencil; void *velem_state; @@ -157,6 +158,12 @@ struct blitter_context *util_blitter_create(struct pipe_context *pipe) pipe->create_depth_stencil_alpha_state(pipe, &dsa); dsa.depth.writemask = 1; + ctx->dsa_flush_depth_stencil = + pipe->create_depth_stencil_alpha_state(pipe, &dsa); + + dsa.depth.enabled = 1; + dsa.depth.writemask = 1; + dsa.depth.func = PIPE_FUNC_ALWAYS; ctx->dsa_write_depth_keep_stencil = pipe->create_depth_stencil_alpha_state(pipe, &dsa); @@ -938,3 +945,42 @@ void util_blitter_clear_depth_stencil(struct blitter_context *blitter, UTIL_BLITTER_ATTRIB_NONE, NULL); blitter_restore_CSOs(ctx); } + +/* Clear a region of a depth stencil surface. */ +void util_blitter_flush_depth_stencil(struct blitter_context *blitter, + struct pipe_surface *dstsurf) +{ + struct blitter_context_priv *ctx = (struct blitter_context_priv*)blitter; + struct pipe_context *pipe = ctx->base.pipe; + struct pipe_framebuffer_state fb_state; + + assert(dstsurf->texture); + if (!dstsurf->texture) + return; + + /* check the saved state */ + blitter_check_saved_CSOs(ctx); + assert(blitter->saved_fb_state.nr_cbufs != ~0); + + /* bind CSOs */ + pipe->bind_blend_state(pipe, ctx->blend_keep_color); + pipe->bind_depth_stencil_alpha_state(pipe, ctx->dsa_flush_depth_stencil); + + pipe->bind_rasterizer_state(pipe, ctx->rs_state); + pipe->bind_fs_state(pipe, blitter_get_fs_col(ctx, 0)); + pipe->bind_vs_state(pipe, ctx->vs_col); + pipe->bind_vertex_elements_state(pipe, ctx->velem_state); + + /* set a framebuffer state */ + fb_state.width = dstsurf->width; + fb_state.height = dstsurf->height; + fb_state.nr_cbufs = 0; + fb_state.cbufs[0] = 0; + fb_state.zsbuf = dstsurf; + pipe->set_framebuffer_state(pipe, &fb_state); + + blitter_set_dst_dimensions(ctx, dstsurf->width, dstsurf->height); + blitter->draw_rectangle(blitter, 0, 0, dstsurf->width, dstsurf->height, 0, + UTIL_BLITTER_ATTRIB_NONE, NULL); + blitter_restore_CSOs(ctx); +} diff --git a/src/gallium/auxiliary/util/u_blitter.h b/src/gallium/auxiliary/util/u_blitter.h index ba3f92eca8..f316587dea 100644 --- a/src/gallium/auxiliary/util/u_blitter.h +++ b/src/gallium/auxiliary/util/u_blitter.h @@ -200,6 +200,8 @@ void util_blitter_clear_depth_stencil(struct blitter_context *blitter, unsigned dstx, unsigned dsty, unsigned width, unsigned height); +void util_blitter_flush_depth_stencil(struct blitter_context *blitter, + struct pipe_surface *dstsurf); /* The functions below should be used to save currently bound constant state * objects inside a driver. The objects are automatically restored at the end * of the util_blitter_{clear, copy_region, fill_region} functions and then diff --git a/src/gallium/drivers/r300/r300_blit.c b/src/gallium/drivers/r300/r300_blit.c index 67e8288440..ff52286b5c 100644 --- a/src/gallium/drivers/r300/r300_blit.c +++ b/src/gallium/drivers/r300/r300_blit.c @@ -279,9 +279,7 @@ void r300_flush_depth_stencil(struct pipe_context *pipe, PIPE_BIND_DEPTH_STENCIL); r300->z_decomp_rd = TRUE; r300_blitter_begin(r300, R300_CLEAR_SURFACE); - util_blitter_clear_depth_stencil(r300->blitter, dstsurf, - PIPE_CLEAR_DEPTH, 0, 0, - 0, 0, dstsurf->width, dstsurf->height); + util_blitter_flush_depth_stencil(r300->blitter, dstsurf); r300_blitter_end(r300); r300->z_decomp_rd = FALSE; -- cgit v1.2.3 From f2804e70623c4c71e5758a24d695f8d6b74bf6d7 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Fri, 13 Aug 2010 10:16:29 +1000 Subject: r600g: fix memory leaks running gears. I noticed gears memory usage was heading skywards, some r600 "states" aren't properly refcounted, and the ctx->state is never freed. --- src/gallium/drivers/r600/r600_state.c | 5 +++++ src/gallium/winsys/r600/drm/radeon_ctx.c | 1 + 2 files changed, 6 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index b9b46d64e3..3efd409ae0 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -379,6 +379,8 @@ static void r600_set_scissor_state(struct pipe_context *ctx, rstate = r600_context_state(rctx, pipe_scissor_type, state); r600_bind_state(ctx, rstate); + /* refcount is taken care of this */ + r600_delete_state(ctx, rstate); } static void r600_set_stencil_ref(struct pipe_context *ctx, @@ -389,6 +391,8 @@ static void r600_set_stencil_ref(struct pipe_context *ctx, rstate = r600_context_state(rctx, pipe_stencil_ref_type, state); r600_bind_state(ctx, rstate); + /* refcount is taken care of this */ + r600_delete_state(ctx, rstate); } static void r600_set_vertex_buffers(struct pipe_context *ctx, @@ -433,6 +437,7 @@ static void r600_set_viewport_state(struct pipe_context *ctx, rstate = r600_context_state(rctx, pipe_viewport_type, state); r600_bind_state(ctx, rstate); + r600_delete_state(ctx, rstate); } void r600_init_state_functions(struct r600_context *rctx) diff --git a/src/gallium/winsys/r600/drm/radeon_ctx.c b/src/gallium/winsys/r600/drm/radeon_ctx.c index ff70ce6de7..45b706bb0f 100644 --- a/src/gallium/winsys/r600/drm/radeon_ctx.c +++ b/src/gallium/winsys/r600/drm/radeon_ctx.c @@ -112,6 +112,7 @@ struct radeon_ctx *radeon_ctx_decref(struct radeon_ctx *ctx) ctx->bo[i] = radeon_bo_decref(ctx->radeon, ctx->bo[i]); } ctx->radeon = radeon_decref(ctx->radeon); + free(ctx->state); free(ctx->draw); free(ctx->bo); free(ctx->pm4); -- cgit v1.2.3 From 16e782b83f298fcf82bbb277690ac0c713e90e21 Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Fri, 13 Aug 2010 02:58:51 +0200 Subject: r300g: disable depth clamp for now It breaks Regnum Online in that it renders random triangles all over the screen. https://bugs.freedesktop.org/show_bug.cgi?id=29518 --- src/gallium/drivers/r300/r300_screen.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c index 6268001054..d1b8e9eed4 100644 --- a/src/gallium/drivers/r300/r300_screen.c +++ b/src/gallium/drivers/r300/r300_screen.c @@ -115,7 +115,6 @@ static int r300_get_param(struct pipe_screen* pscreen, enum pipe_cap param) case PIPE_CAP_TEXTURE_MIRROR_REPEAT: case PIPE_CAP_BLEND_EQUATION_SEPARATE: case PIPE_CAP_TEXTURE_SWIZZLE: - case PIPE_CAP_DEPTH_CLAMP: return 1; /* Unsupported features (boolean caps). */ @@ -124,6 +123,7 @@ static int r300_get_param(struct pipe_screen* pscreen, enum pipe_cap param) case PIPE_CAP_TGSI_CONT_SUPPORTED: case PIPE_CAP_INDEP_BLEND_ENABLE: case PIPE_CAP_INDEP_BLEND_FUNC: + case PIPE_CAP_DEPTH_CLAMP: /* XXX implemented, but breaks Regnum Online */ return 0; /* Texturing. */ -- cgit v1.2.3 From 4985ce1755c5d6f4fa6281150ae0bf7df594146f Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Fri, 13 Aug 2010 03:27:45 +0200 Subject: r600g: update shader caps MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Sent on ML by Владимир. These values are what fglrx returns. --- src/gallium/drivers/r600/r600_screen.c | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/r600_screen.c b/src/gallium/drivers/r600/r600_screen.c index 6ec842d591..cdaca9ed7d 100644 --- a/src/gallium/drivers/r600/r600_screen.c +++ b/src/gallium/drivers/r600/r600_screen.c @@ -107,29 +107,37 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param) /* Shader limits. */ case PIPE_CAP_MAX_VS_INSTRUCTIONS: + return 16384; //max native instructions, not greater than max instructions case PIPE_CAP_MAX_VS_ALU_INSTRUCTIONS: case PIPE_CAP_MAX_VS_TEX_INSTRUCTIONS: case PIPE_CAP_MAX_VS_TEX_INDIRECTIONS: + return 16384; case PIPE_CAP_MAX_FS_INSTRUCTIONS: + return 16384; //max program native instructions case PIPE_CAP_MAX_FS_ALU_INSTRUCTIONS: + return 16384; //max program native ALU instructions case PIPE_CAP_MAX_FS_TEX_INSTRUCTIONS: + return 16384; //max program native texture instructions case PIPE_CAP_MAX_FS_TEX_INDIRECTIONS: - return 8192; + return 2048; //max program native texture indirections case PIPE_CAP_MAX_VS_CONTROL_FLOW_DEPTH: case PIPE_CAP_MAX_FS_CONTROL_FLOW_DEPTH: return 8; /* FIXME */ case PIPE_CAP_MAX_VS_INPUTS: + return 16; //max native attributes case PIPE_CAP_MAX_FS_INPUTS: - return 32; + return 10; //max native attributes case PIPE_CAP_MAX_VS_TEMPS: + return 256; //max native temporaries case PIPE_CAP_MAX_FS_TEMPS: - return 128; + return 256; //max native temporaries case PIPE_CAP_MAX_VS_ADDRS: case PIPE_CAP_MAX_FS_ADDRS: - return 1; /* FIXME Isn't this equal to TEMPS? */ + return 1; //max native address registers/* FIXME Isn't this equal to TEMPS? */ case PIPE_CAP_MAX_VS_CONSTS: + return 256; //max native parameters case PIPE_CAP_MAX_FS_CONSTS: - return 256; /* FIXME I believe this should be much higher. */ + return 256; //max program native parameters case PIPE_CAP_MAX_CONST_BUFFERS: return 1; case PIPE_CAP_MAX_CONST_BUFFER_SIZE: /* in bytes */ -- cgit v1.2.3 From aa6bdd38af344f7f2ea9956d2ed415f26e8f07cb Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Fri, 13 Aug 2010 04:34:38 +0200 Subject: r300g: fix fastfill when color and Z clear are invoked separately This always restores the previous depth clear value after CBZB clear. --- src/gallium/drivers/r300/r300_blit.c | 2 +- src/gallium/drivers/r300/r300_state.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_blit.c b/src/gallium/drivers/r300/r300_blit.c index ff52286b5c..5fe9b9017d 100644 --- a/src/gallium/drivers/r300/r300_blit.c +++ b/src/gallium/drivers/r300/r300_blit.c @@ -176,7 +176,7 @@ static void r300_clear(struct pipe_context* pipe, uint32_t width = fb->width; uint32_t height = fb->height; boolean has_hyperz = r300->rws->get_value(r300->rws, R300_CAN_HYPERZ); - uint32_t hyperz_dcv = 0; + uint32_t hyperz_dcv = hyperz->zb_depthclearvalue; /* Enable fast Z clear. * The zbuffer must be in micro-tiled mode, otherwise it locks up. */ diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c index e62a33daeb..8abc65a2b8 100644 --- a/src/gallium/drivers/r300/r300_state.c +++ b/src/gallium/drivers/r300/r300_state.c @@ -796,7 +796,7 @@ static void r300->z_compression, r300->z_fastfill, tex->zmask_mem[level] ? tex->zmask_mem[level]->ofs : 0xdeadbeef); } - + /* Polygon offset depends on the zbuffer bit depth. */ if (r300->zbuffer_bpp != zbuffer_bpp) { r300->zbuffer_bpp = zbuffer_bpp; -- cgit v1.2.3 From bac59b336b7910be8040a3bf98be628775a67af6 Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Fri, 13 Aug 2010 08:16:59 +0200 Subject: r300g: do not support separate depth/stencil clear in the driver It doesn't work well with Hyper-Z, so put the burden on the state tracker. --- src/gallium/drivers/r300/r300_screen.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c index d1b8e9eed4..1e4edcdbc3 100644 --- a/src/gallium/drivers/r300/r300_screen.c +++ b/src/gallium/drivers/r300/r300_screen.c @@ -124,6 +124,7 @@ static int r300_get_param(struct pipe_screen* pscreen, enum pipe_cap param) case PIPE_CAP_INDEP_BLEND_ENABLE: case PIPE_CAP_INDEP_BLEND_FUNC: case PIPE_CAP_DEPTH_CLAMP: /* XXX implemented, but breaks Regnum Online */ + case PIPE_CAP_DEPTHSTENCIL_CLEAR_SEPARATE: return 0; /* Texturing. */ @@ -150,9 +151,6 @@ static int r300_get_param(struct pipe_screen* pscreen, enum pipe_cap param) case PIPE_CAP_MAX_CONST_BUFFER_SIZE: return 256; - case PIPE_CAP_DEPTHSTENCIL_CLEAR_SEPARATE: - return 1; - /* Fragment coordinate conventions. */ case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT: case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER: -- cgit v1.2.3 From 0d699e8ee930c6c7e0f8abff14bf37e7f67807fe Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Sat, 14 Aug 2010 22:35:23 +0200 Subject: r300g: separate num_cs_end_dwords out from prepare_for_rendering --- src/gallium/drivers/r300/r300_emit.c | 11 ++++++++++ src/gallium/drivers/r300/r300_emit.h | 1 + src/gallium/drivers/r300/r300_render.c | 37 ++++++++++++---------------------- 3 files changed, 25 insertions(+), 24 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c index 98958d1a2e..ccf360b6ea 100644 --- a/src/gallium/drivers/r300/r300_emit.c +++ b/src/gallium/drivers/r300/r300_emit.c @@ -1209,6 +1209,17 @@ unsigned r300_get_num_dirty_dwords(struct r300_context *r300) return dwords; } +unsigned r300_get_num_cs_end_dwords(struct r300_context *r300) +{ + unsigned dwords = 0; + + /* Emitted in flush. */ + dwords += 26; /* emit_query_end */ + dwords += r300->hyperz_state.size + 2; /* emit_hyperz_end + zcache flush */ + + return dwords; +} + /* Emit all dirty state. */ void r300_emit_dirty_state(struct r300_context* r300) { diff --git a/src/gallium/drivers/r300/r300_emit.h b/src/gallium/drivers/r300/r300_emit.h index 2f2c2f2dcb..bae2525634 100644 --- a/src/gallium/drivers/r300/r300_emit.h +++ b/src/gallium/drivers/r300/r300_emit.h @@ -116,6 +116,7 @@ void r300_emit_hiz_clear(struct r300_context *r300, unsigned size, void *state); void r300_emit_zmask_clear(struct r300_context *r300, unsigned size, void *state); unsigned r300_get_num_dirty_dwords(struct r300_context *r300); +unsigned r300_get_num_cs_end_dwords(struct r300_context *r300); /* Emit all dirty state. */ void r300_emit_dirty_state(struct r300_context* r300); diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c index f2ff65b261..86b11ca045 100644 --- a/src/gallium/drivers/r300/r300_render.c +++ b/src/gallium/drivers/r300/r300_render.c @@ -186,20 +186,14 @@ enum r300_prepare_flags { * \param cs_dwords The number of dwords to reserve in CS. * \param aos_offset The offset passed to emit_aos. * \param index_bias The index bias to emit. - * \param end_cs_dwords The number of free dwords which must be available - * at the end of CS after drawing in case the CS space - * management is performed by a draw_* function manually. - * The parameter may be NULL. */ static void r300_prepare_for_rendering(struct r300_context *r300, enum r300_prepare_flags flags, struct pipe_resource *index_buffer, unsigned cs_dwords, int aos_offset, - int index_bias, - unsigned *end_cs_dwords) + int index_bias) { - unsigned end_dwords = 0; boolean flushed = FALSE; boolean first_draw = flags & PREP_FIRST_DRAW; boolean emit_aos = flags & PREP_EMIT_AOS; @@ -221,11 +215,7 @@ static void r300_prepare_for_rendering(struct r300_context *r300, cs_dwords += 7; /* emit_aos_swtcl */ } - /* Emitted in flush. */ - end_dwords += 26; /* emit_query_end */ - end_dwords += r300->hyperz_state.size + 2; /* emit_hyperz_end + zcache flush */ - - cs_dwords += end_dwords; + cs_dwords += r300_get_num_cs_end_dwords(r300); /* Reserve requested CS space. */ if (cs_dwords > (r300->cs->ndw - r300->cs->cdw)) { @@ -250,9 +240,6 @@ static void r300_prepare_for_rendering(struct r300_context *r300, if (emit_aos_swtcl) r300_emit_aos_swtcl(r300, indexed); } - - if (end_cs_dwords) - *end_cs_dwords = end_dwords; } static boolean immd_is_good_idea(struct r300_context *r300, @@ -353,7 +340,7 @@ static void r300_emit_draw_arrays_immediate(struct r300_context *r300, dwords = 9 + count * vertex_size; - r300_prepare_for_rendering(r300, PREP_FIRST_DRAW, NULL, dwords, 0, 0, NULL); + r300_prepare_for_rendering(r300, PREP_FIRST_DRAW, NULL, dwords, 0, 0); BEGIN_CS(dwords); OUT_CS_REG(R300_GA_COLOR_CONTROL, @@ -533,7 +520,7 @@ static void r300_draw_range_elements(struct pipe_context* pipe, /* 15 dwords for emit_draw_elements */ r300_prepare_for_rendering(r300, PREP_FIRST_DRAW | PREP_VALIDATE_VBOS | PREP_EMIT_AOS | PREP_INDEXED, - indexBuffer, 15, buffer_offset, indexBias, NULL); + indexBuffer, 15, buffer_offset, indexBias); if (alt_num_verts || count <= 65535) { r300_emit_draw_elements(r300, indexBuffer, indexSize, @@ -552,7 +539,7 @@ static void r300_draw_range_elements(struct pipe_context* pipe, if (count) { r300_prepare_for_rendering(r300, PREP_VALIDATE_VBOS | PREP_EMIT_AOS | PREP_INDEXED, - indexBuffer, 15, buffer_offset, indexBias, NULL); + indexBuffer, 15, buffer_offset, indexBias); } } while (count); } @@ -597,7 +584,7 @@ static void r300_draw_arrays(struct pipe_context* pipe, unsigned mode, } else { /* 9 spare dwords for emit_draw_arrays. */ r300_prepare_for_rendering(r300, PREP_FIRST_DRAW | PREP_VALIDATE_VBOS | PREP_EMIT_AOS, - NULL, 9, start, 0, NULL); + NULL, 9, start, 0); if (alt_num_verts || count <= 65535) { r300_emit_draw_arrays(r300, mode, count); @@ -613,7 +600,7 @@ static void r300_draw_arrays(struct pipe_context* pipe, unsigned mode, if (count) { r300_prepare_for_rendering(r300, PREP_VALIDATE_VBOS | PREP_EMIT_AOS, NULL, 9, - start, 0, NULL); + start, 0); } } while (count); } @@ -854,7 +841,7 @@ static void r300_render_draw_arrays(struct vbuf_render* render, (void) i; (void) ptr; r300_prepare_for_rendering(r300, PREP_FIRST_DRAW | PREP_EMIT_AOS_SWTCL, - NULL, dwords, 0, 0, NULL); + NULL, dwords, 0, 0); DBG(r300, DBG_DRAW, "r300: render_draw_arrays (count: %d)\n", count); @@ -907,7 +894,8 @@ static void r300_render_draw_elements(struct vbuf_render* render, * indices than it can fit in CS. */ r300_prepare_for_rendering(r300, PREP_FIRST_DRAW | PREP_EMIT_AOS_SWTCL | PREP_INDEXED, - NULL, 256, 0, 0, &end_cs_dwords); + NULL, 256, 0, 0); + end_cs_dwords = r300_get_num_cs_end_dwords(r300); while (count) { free_dwords = r300->cs->ndw - r300->cs->cdw; @@ -937,7 +925,8 @@ static void r300_render_draw_elements(struct vbuf_render* render, if (count) { r300_prepare_for_rendering(r300, PREP_EMIT_AOS_SWTCL | PREP_INDEXED, - NULL, 256, 0, 0, &end_cs_dwords); + NULL, 256, 0, 0); + end_cs_dwords = r300_get_num_cs_end_dwords(r300); } } } @@ -1031,7 +1020,7 @@ static void r300_blitter_draw_rectangle(struct blitter_context *blitter, r300->clip_state.dirty = FALSE; r300->viewport_state.dirty = FALSE; - r300_prepare_for_rendering(r300, PREP_FIRST_DRAW, NULL, dwords, 0, 0, NULL); + r300_prepare_for_rendering(r300, PREP_FIRST_DRAW, NULL, dwords, 0, 0); DBG(r300, DBG_DRAW, "r300: draw_rectangle\n"); -- cgit v1.2.3 From 5f8ccf1e2766e3e7537dd8a838837c5e4af4246c Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Sat, 14 Aug 2010 18:37:04 +0200 Subject: r300g: do not use fastfill if ZMask RAM is not properly initialized z_fastfill -> dirty_zmask[level]. --- src/gallium/drivers/r300/r300_blit.c | 16 ++++++++++++---- src/gallium/drivers/r300/r300_context.h | 2 -- src/gallium/drivers/r300/r300_hyperz.c | 27 ++++++++++++++++++--------- src/gallium/drivers/r300/r300_state.c | 4 +--- 4 files changed, 31 insertions(+), 18 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_blit.c b/src/gallium/drivers/r300/r300_blit.c index 5fe9b9017d..d44f53ec6f 100644 --- a/src/gallium/drivers/r300/r300_blit.c +++ b/src/gallium/drivers/r300/r300_blit.c @@ -99,9 +99,6 @@ static boolean r300_cbzb_clear_allowed(struct r300_context *r300, struct pipe_framebuffer_state *fb = (struct pipe_framebuffer_state*)r300->fb_state.state; - if (r300->z_fastfill) - clear_buffers &= ~(PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL); - /* Only color clear allowed, and only one colorbuffer. */ if (clear_buffers != PIPE_CLEAR_COLOR || fb->nr_cbufs != 1) return FALSE; @@ -186,8 +183,10 @@ static void r300_clear(struct pipe_context* pipe, r300_depth_clear_value(fb->zsbuf->format, depth, stencil); r300_mark_fb_state_dirty(r300, R300_CHANGED_ZCLEAR_FLAG); - if (r300->z_compression || r300->z_fastfill) + if (r300_texture(fb->zsbuf->texture)->zmask_mem[fb->zsbuf->level]) { r300->zmask_clear.dirty = TRUE; + buffers &= ~PIPE_CLEAR_DEPTHSTENCIL; + } if (r300->hiz_enable) r300->hiz_clear.dirty = TRUE; } @@ -222,6 +221,15 @@ static void r300_clear(struct pipe_context* pipe, r300_mark_fb_state_dirty(r300, R300_CHANGED_CBZB_FLAG); } + /* Enable fastfill. + * + * If we cleared the zmask, it's dirty now. The Hyper-Z state update + * looks for a dirty zmask and enables fastfill accordingly. */ + if (fb->zsbuf && + r300_texture(fb->zsbuf->texture)->dirty_zmask[fb->zsbuf->level]) { + r300->hyperz_state.dirty = TRUE; + } + /* XXX this flush "fixes" a hardlock in the cubestorm xscreensaver */ if (r300->flush_counter == 0) pipe->flush(pipe, 0, NULL); diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h index 8b772f3887..98e6d358e9 100644 --- a/src/gallium/drivers/r300/r300_context.h +++ b/src/gallium/drivers/r300/r300_context.h @@ -565,8 +565,6 @@ struct r300_context { boolean two_sided_color; /* Incompatible vertex buffer layout? (misaligned stride or buffer_offset) */ boolean incompatible_vb_layout; - /* Whether fast zclear is enabled. */ - boolean z_fastfill; #define R300_Z_COMPRESS_44 1 #define RV350_Z_COMPRESS_88 2 int z_compression; diff --git a/src/gallium/drivers/r300/r300_hyperz.c b/src/gallium/drivers/r300/r300_hyperz.c index 3b0adc3584..1f6244c49e 100644 --- a/src/gallium/drivers/r300/r300_hyperz.c +++ b/src/gallium/drivers/r300/r300_hyperz.c @@ -129,6 +129,9 @@ static void r300_update_hyperz(struct r300_context* r300) { struct r300_hyperz_state *z = (struct r300_hyperz_state*)r300->hyperz_state.state; + struct pipe_framebuffer_state *fb = + (struct pipe_framebuffer_state*)r300->fb_state.state; + boolean dirty_zmask = FALSE; z->gb_z_peq_config = 0; z->zb_bw_cntl = 0; @@ -140,23 +143,29 @@ static void r300_update_hyperz(struct r300_context* r300) return; } + if (!fb->zsbuf) + return; + if (!r300->rws->get_value(r300->rws, R300_CAN_HYPERZ)) return; + dirty_zmask = r300_texture(fb->zsbuf->texture)->dirty_zmask[fb->zsbuf->level]; + + /* Z fastfill. */ + if (dirty_zmask) { + z->zb_bw_cntl |= R300_FAST_FILL_ENABLE; /* | R300_FORCE_COMPRESSED_STENCIL_VALUE_ENABLE;*/ + } + /* Zbuffer compression. */ - if (r300->z_compression) { + if (dirty_zmask && r300->z_compression) { z->zb_bw_cntl |= R300_RD_COMP_ENABLE; if (r300->z_decomp_rd == false) z->zb_bw_cntl |= R300_WR_COMP_ENABLE; - /* RV350 and up optimizations. */ - if (r300->z_compression == RV350_Z_COMPRESS_88) - z->gb_z_peq_config |= R300_GB_Z_PEQ_CONFIG_Z_PEQ_SIZE_8_8; - } - - /* Z fastfill. */ - if (r300->z_fastfill) { - z->zb_bw_cntl |= R300_FAST_FILL_ENABLE; /* | R300_FORCE_COMPRESSED_STENCIL_VALUE_ENABLE;*/ } + /* RV350 and up optimizations. */ + /* The section 10.4.9 in the docs is a lie. */ + if (r300->z_compression == RV350_Z_COMPRESS_88) + z->gb_z_peq_config |= R300_GB_Z_PEQ_CONFIG_Z_PEQ_SIZE_8_8; if (r300->hiz_enable) { bool can_hiz = r300_can_hiz(r300); diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c index 8abc65a2b8..9afaa5f651 100644 --- a/src/gallium/drivers/r300/r300_state.c +++ b/src/gallium/drivers/r300/r300_state.c @@ -753,7 +753,6 @@ static void r300_mark_fb_state_dirty(r300, R300_CHANGED_FB_STATE); r300->hiz_enable = false; - r300->z_fastfill = false; r300->z_compression = false; if (state->zsbuf) { @@ -785,7 +784,6 @@ static void } if (tex->zmask_mem[level]) { - r300->z_fastfill = 1; /* compression causes hangs on 16-bit */ if (zbuffer_bpp == 24) r300->z_compression = compress; @@ -793,7 +791,7 @@ static void DBG(r300, DBG_HYPERZ, "hyper-z features: hiz: %d @ %08x z-compression: %d z-fastfill: %d @ %08x\n", r300->hiz_enable, tex->hiz_mem[level] ? tex->hiz_mem[level]->ofs : 0xdeadbeef, - r300->z_compression, r300->z_fastfill, + r300->z_compression, tex->zmask_mem[level] ? 1 : 0, tex->zmask_mem[level] ? tex->zmask_mem[level]->ofs : 0xdeadbeef); } -- cgit v1.2.3 From 516152112ea2a8524865d230f657977583246092 Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Fri, 13 Aug 2010 04:57:30 +0200 Subject: r300g: do not clear with blitter if we clear just the ZMask RAM This skips the blitter clear path entirely if the color is not cleared and the depth+stencil is cleared with the ZMask. --- src/gallium/drivers/r300/r300_blit.c | 46 +++++++++++++++++++++++++++++------- 1 file changed, 37 insertions(+), 9 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_blit.c b/src/gallium/drivers/r300/r300_blit.c index d44f53ec6f..00756c771c 100644 --- a/src/gallium/drivers/r300/r300_blit.c +++ b/src/gallium/drivers/r300/r300_blit.c @@ -21,6 +21,8 @@ * USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include "r300_context.h" +#include "r300_emit.h" +#include "r300_hyperz.h" #include "r300_texture.h" #include "r300_winsys.h" @@ -177,8 +179,7 @@ static void r300_clear(struct pipe_context* pipe, /* Enable fast Z clear. * The zbuffer must be in micro-tiled mode, otherwise it locks up. */ - if ((buffers & (PIPE_CLEAR_DEPTH|PIPE_CLEAR_STENCIL)) && has_hyperz) { - + if ((buffers & PIPE_CLEAR_DEPTHSTENCIL) && has_hyperz) { hyperz_dcv = hyperz->zb_depthclearvalue = r300_depth_clear_value(fb->zsbuf->format, depth, stencil); @@ -206,13 +207,40 @@ static void r300_clear(struct pipe_context* pipe, } /* Clear. */ - r300_blitter_begin(r300, R300_CLEAR); - util_blitter_clear(r300->blitter, - width, - height, - fb->nr_cbufs, - buffers, rgba, depth, stencil); - r300_blitter_end(r300); + if (buffers) { + /* Clear using the blitter. */ + r300_blitter_begin(r300, R300_CLEAR); + util_blitter_clear(r300->blitter, + width, + height, + fb->nr_cbufs, + buffers, rgba, depth, stencil); + r300_blitter_end(r300); + } else if (r300->zmask_clear.dirty) { + /* Just clear zmask and hiz now, this does not use a standard draw + * procedure. */ + unsigned dwords; + + /* Calculate zmask_clear and hiz_clear atom sizes. */ + r300_update_hyperz_state(r300); + dwords = r300->zmask_clear.size + + (r300->hiz_clear.dirty ? r300->hiz_clear.size : 0) + + r300_get_num_cs_end_dwords(r300); + + /* Reserve CS space. */ + if (dwords > (r300->cs->ndw - r300->cs->cdw)) { + r300->context.flush(&r300->context, 0, NULL); + } + + /* Emit clear packets. */ + r300_emit_zmask_clear(r300, r300->zmask_clear.size, + r300->zmask_clear.state); + if (r300->hiz_clear.dirty) + r300_emit_hiz_clear(r300, r300->hiz_clear.size, + r300->hiz_clear.state); + } else { + assert(0); + } /* Disable CBZB clear. */ if (r300->cbzb_clear) { -- cgit v1.2.3 From 59c2230879cb5149ce99ac8565ce6af9c5b02e04 Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Sun, 15 Aug 2010 02:18:15 +0200 Subject: r300g: rename dirty_zmask -> zmask_in_use --- src/gallium/drivers/r300/r300_blit.c | 10 +++++----- src/gallium/drivers/r300/r300_context.h | 2 +- src/gallium/drivers/r300/r300_emit.c | 4 ++-- src/gallium/drivers/r300/r300_hyperz.c | 8 ++++---- src/gallium/drivers/r300/r300_state_derived.c | 2 +- 5 files changed, 13 insertions(+), 13 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_blit.c b/src/gallium/drivers/r300/r300_blit.c index 00756c771c..29393109fa 100644 --- a/src/gallium/drivers/r300/r300_blit.c +++ b/src/gallium/drivers/r300/r300_blit.c @@ -251,10 +251,10 @@ static void r300_clear(struct pipe_context* pipe, /* Enable fastfill. * - * If we cleared the zmask, it's dirty now. The Hyper-Z state update - * looks for a dirty zmask and enables fastfill accordingly. */ + * If we cleared the zmask, it's in use now. The Hyper-Z state update + * looks if zmask is in use and enables fastfill accordingly. */ if (fb->zsbuf && - r300_texture(fb->zsbuf->texture)->dirty_zmask[fb->zsbuf->level]) { + r300_texture(fb->zsbuf->texture)->zmask_in_use[fb->zsbuf->level]) { r300->hyperz_state.dirty = TRUE; } @@ -307,7 +307,7 @@ void r300_flush_depth_stencil(struct pipe_context *pipe, if (!tex->zmask_mem[subdst.level]) return; - if (!tex->dirty_zmask[subdst.level]) + if (!tex->zmask_in_use[subdst.level]) return; dstsurf = pipe->screen->get_tex_surface(pipe->screen, dst, @@ -319,7 +319,7 @@ void r300_flush_depth_stencil(struct pipe_context *pipe, r300_blitter_end(r300); r300->z_decomp_rd = FALSE; - tex->dirty_zmask[subdst.level] = FALSE; + tex->zmask_in_use[subdst.level] = FALSE; } /* Copy a block of pixels from one surface to another using HW. */ diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h index 98e6d358e9..6c4c396982 100644 --- a/src/gallium/drivers/r300/r300_context.h +++ b/src/gallium/drivers/r300/r300_context.h @@ -397,7 +397,7 @@ struct r300_texture { /* hyper-z memory allocs */ struct mem_block *hiz_mem[R300_MAX_TEXTURE_LEVELS]; struct mem_block *zmask_mem[R300_MAX_TEXTURE_LEVELS]; - boolean dirty_zmask[R300_MAX_TEXTURE_LEVELS]; + boolean zmask_in_use[R300_MAX_TEXTURE_LEVELS]; /* This is the level tiling flags were last time set for. * It's used to prevent redundant tiling-flags changes from happening.*/ diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c index ccf360b6ea..fe334c924e 100644 --- a/src/gallium/drivers/r300/r300_emit.c +++ b/src/gallium/drivers/r300/r300_emit.c @@ -1086,8 +1086,8 @@ void r300_emit_zmask_clear(struct r300_context *r300, unsigned size, void *state r300_emit_zmask_line_clear(r300, offset, stride, 0x0);//0xffffffff); } - /* Mark the current zbuffer's zmask as dirty. */ - tex->dirty_zmask[fb->zsbuf->level] = TRUE; + /* Mark the current zbuffer's zmask as in use. */ + tex->zmask_in_use[fb->zsbuf->level] = TRUE; } void r300_emit_ztop_state(struct r300_context* r300, diff --git a/src/gallium/drivers/r300/r300_hyperz.c b/src/gallium/drivers/r300/r300_hyperz.c index 1f6244c49e..9cce195eb4 100644 --- a/src/gallium/drivers/r300/r300_hyperz.c +++ b/src/gallium/drivers/r300/r300_hyperz.c @@ -131,7 +131,7 @@ static void r300_update_hyperz(struct r300_context* r300) (struct r300_hyperz_state*)r300->hyperz_state.state; struct pipe_framebuffer_state *fb = (struct pipe_framebuffer_state*)r300->fb_state.state; - boolean dirty_zmask = FALSE; + boolean zmask_in_use = FALSE; z->gb_z_peq_config = 0; z->zb_bw_cntl = 0; @@ -149,15 +149,15 @@ static void r300_update_hyperz(struct r300_context* r300) if (!r300->rws->get_value(r300->rws, R300_CAN_HYPERZ)) return; - dirty_zmask = r300_texture(fb->zsbuf->texture)->dirty_zmask[fb->zsbuf->level]; + zmask_in_use = r300_texture(fb->zsbuf->texture)->zmask_in_use[fb->zsbuf->level]; /* Z fastfill. */ - if (dirty_zmask) { + if (zmask_in_use) { z->zb_bw_cntl |= R300_FAST_FILL_ENABLE; /* | R300_FORCE_COMPRESSED_STENCIL_VALUE_ENABLE;*/ } /* Zbuffer compression. */ - if (dirty_zmask && r300->z_compression) { + if (zmask_in_use && r300->z_compression) { z->zb_bw_cntl |= R300_RD_COMP_ENABLE; if (r300->z_decomp_rd == false) z->zb_bw_cntl |= R300_WR_COMP_ENABLE; diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c index c047a127ba..4a63ed7fc1 100644 --- a/src/gallium/drivers/r300/r300_state_derived.c +++ b/src/gallium/drivers/r300/r300_state_derived.c @@ -738,7 +738,7 @@ static void r300_flush_depth_textures(struct r300_context *r300) continue; for (level = 0; level <= tex->last_level; level++) - if (r300_texture(tex)->dirty_zmask[level]) { + if (r300_texture(tex)->zmask_in_use[level]) { /* We don't handle 3D textures and cubemaps yet. */ r300_flush_depth_stencil(&r300->context, tex, u_subresource(0, level), 0); -- cgit v1.2.3 From d5a86f9fc9fc96a0d771c0409b557636ef89f350 Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Sun, 15 Aug 2010 03:14:25 +0200 Subject: r300g: do not use HiZ if HiZ RAM is not properly initialized --- src/gallium/drivers/r300/r300_blit.c | 17 ++++++++++------- src/gallium/drivers/r300/r300_context.h | 2 +- src/gallium/drivers/r300/r300_emit.c | 3 +++ src/gallium/drivers/r300/r300_hyperz.c | 12 ++++++++---- src/gallium/drivers/r300/r300_state.c | 7 +------ 5 files changed, 23 insertions(+), 18 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_blit.c b/src/gallium/drivers/r300/r300_blit.c index 29393109fa..559936cec6 100644 --- a/src/gallium/drivers/r300/r300_blit.c +++ b/src/gallium/drivers/r300/r300_blit.c @@ -172,6 +172,8 @@ static void r300_clear(struct pipe_context* pipe, (struct pipe_framebuffer_state*)r300->fb_state.state; struct r300_hyperz_state *hyperz = (struct r300_hyperz_state*)r300->hyperz_state.state; + struct r300_texture *zstex = + fb->zsbuf ? r300_texture(fb->zsbuf->texture) : NULL; uint32_t width = fb->width; uint32_t height = fb->height; boolean has_hyperz = r300->rws->get_value(r300->rws, R300_CAN_HYPERZ); @@ -184,11 +186,11 @@ static void r300_clear(struct pipe_context* pipe, r300_depth_clear_value(fb->zsbuf->format, depth, stencil); r300_mark_fb_state_dirty(r300, R300_CHANGED_ZCLEAR_FLAG); - if (r300_texture(fb->zsbuf->texture)->zmask_mem[fb->zsbuf->level]) { + if (zstex->zmask_mem[fb->zsbuf->level]) { r300->zmask_clear.dirty = TRUE; buffers &= ~PIPE_CLEAR_DEPTHSTENCIL; } - if (r300->hiz_enable) + if (zstex->hiz_mem[fb->zsbuf->level]) r300->hiz_clear.dirty = TRUE; } @@ -249,12 +251,13 @@ static void r300_clear(struct pipe_context* pipe, r300_mark_fb_state_dirty(r300, R300_CHANGED_CBZB_FLAG); } - /* Enable fastfill. + /* Enable fastfill and/or hiz. * - * If we cleared the zmask, it's in use now. The Hyper-Z state update - * looks if zmask is in use and enables fastfill accordingly. */ - if (fb->zsbuf && - r300_texture(fb->zsbuf->texture)->zmask_in_use[fb->zsbuf->level]) { + * If we cleared zmask/hiz, it's in use now. The Hyper-Z state update + * looks if zmask/hiz is in use and enables fastfill accordingly. */ + if (zstex && + (zstex->zmask_in_use[fb->zsbuf->level] || + zstex->hiz_in_use[fb->zsbuf->level])) { r300->hyperz_state.dirty = TRUE; } diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h index 6c4c396982..6fa7f470f9 100644 --- a/src/gallium/drivers/r300/r300_context.h +++ b/src/gallium/drivers/r300/r300_context.h @@ -398,6 +398,7 @@ struct r300_texture { struct mem_block *hiz_mem[R300_MAX_TEXTURE_LEVELS]; struct mem_block *zmask_mem[R300_MAX_TEXTURE_LEVELS]; boolean zmask_in_use[R300_MAX_TEXTURE_LEVELS]; + boolean hiz_in_use[R300_MAX_TEXTURE_LEVELS]; /* This is the level tiling flags were last time set for. * It's used to prevent redundant tiling-flags changes from happening.*/ @@ -568,7 +569,6 @@ struct r300_context { #define R300_Z_COMPRESS_44 1 #define RV350_Z_COMPRESS_88 2 int z_compression; - boolean hiz_enable; boolean cbzb_clear; boolean z_decomp_rd; diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c index fe334c924e..d0fd45349e 100644 --- a/src/gallium/drivers/r300/r300_emit.c +++ b/src/gallium/drivers/r300/r300_emit.c @@ -1046,6 +1046,9 @@ void r300_emit_hiz_clear(struct r300_context *r300, unsigned size, void *state) r300_emit_hiz_line_clear(r300, offset, stride, 0xffffffff); } z->current_func = -1; + + /* Mark the current zbuffer's hiz ram as in use. */ + tex->hiz_in_use[fb->zsbuf->level] = TRUE; } void r300_emit_zmask_clear(struct r300_context *r300, unsigned size, void *state) diff --git a/src/gallium/drivers/r300/r300_hyperz.c b/src/gallium/drivers/r300/r300_hyperz.c index 9cce195eb4..811b5646e1 100644 --- a/src/gallium/drivers/r300/r300_hyperz.c +++ b/src/gallium/drivers/r300/r300_hyperz.c @@ -131,7 +131,10 @@ static void r300_update_hyperz(struct r300_context* r300) (struct r300_hyperz_state*)r300->hyperz_state.state; struct pipe_framebuffer_state *fb = (struct pipe_framebuffer_state*)r300->fb_state.state; + struct r300_texture *zstex = + fb->zsbuf ? r300_texture(fb->zsbuf->texture) : NULL; boolean zmask_in_use = FALSE; + boolean hiz_in_use = FALSE; z->gb_z_peq_config = 0; z->zb_bw_cntl = 0; @@ -143,13 +146,14 @@ static void r300_update_hyperz(struct r300_context* r300) return; } - if (!fb->zsbuf) + if (!zstex) return; if (!r300->rws->get_value(r300->rws, R300_CAN_HYPERZ)) return; - zmask_in_use = r300_texture(fb->zsbuf->texture)->zmask_in_use[fb->zsbuf->level]; + zmask_in_use = zstex->zmask_in_use[fb->zsbuf->level]; + hiz_in_use = zstex->hiz_in_use[fb->zsbuf->level]; /* Z fastfill. */ if (zmask_in_use) { @@ -167,7 +171,7 @@ static void r300_update_hyperz(struct r300_context* r300) if (r300->z_compression == RV350_Z_COMPRESS_88) z->gb_z_peq_config |= R300_GB_Z_PEQ_CONFIG_Z_PEQ_SIZE_8_8; - if (r300->hiz_enable) { + if (hiz_in_use) { bool can_hiz = r300_can_hiz(r300); if (can_hiz) { z->zb_bw_cntl |= R300_HIZ_ENABLE; @@ -177,8 +181,8 @@ static void r300_update_hyperz(struct r300_context* r300) } } + /* R500-specific features and optimizations. */ if (r300->screen->caps.is_r500) { - /* XXX Are these bits really available on RV350? */ z->zb_bw_cntl |= R500_HIZ_FP_EXP_BITS_3; z->zb_bw_cntl |= R500_HIZ_EQUAL_REJECT_ENABLE | diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c index 9afaa5f651..239edd98e3 100644 --- a/src/gallium/drivers/r300/r300_state.c +++ b/src/gallium/drivers/r300/r300_state.c @@ -752,7 +752,6 @@ static void r300_mark_fb_state_dirty(r300, R300_CHANGED_FB_STATE); - r300->hiz_enable = false; r300->z_compression = false; if (state->zsbuf) { @@ -779,17 +778,13 @@ static void /* work out whether we can support zmask features on this buffer */ r300_zmask_alloc_block(r300, zs_surf, compress); - if (tex->hiz_mem[level]) { - r300->hiz_enable = 1; - } - if (tex->zmask_mem[level]) { /* compression causes hangs on 16-bit */ if (zbuffer_bpp == 24) r300->z_compression = compress; } DBG(r300, DBG_HYPERZ, - "hyper-z features: hiz: %d @ %08x z-compression: %d z-fastfill: %d @ %08x\n", r300->hiz_enable, + "hyper-z features: hiz: %d @ %08x z-compression: %d z-fastfill: %d @ %08x\n", tex->hiz_mem[level] ? 1 : 0, tex->hiz_mem[level] ? tex->hiz_mem[level]->ofs : 0xdeadbeef, r300->z_compression, tex->zmask_mem[level] ? 1 : 0, tex->zmask_mem[level] ? tex->zmask_mem[level]->ofs : 0xdeadbeef); -- cgit v1.2.3 From 6dfcff6b05f44ad4e338fac18ccd4d470c953b1e Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Sun, 15 Aug 2010 09:13:50 +0200 Subject: r300g: mark HiZ/ZMask_clear atoms as non-dirty after emission in clear --- src/gallium/drivers/r300/r300_blit.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_blit.c b/src/gallium/drivers/r300/r300_blit.c index 559936cec6..47ffc0cb3c 100644 --- a/src/gallium/drivers/r300/r300_blit.c +++ b/src/gallium/drivers/r300/r300_blit.c @@ -237,9 +237,12 @@ static void r300_clear(struct pipe_context* pipe, /* Emit clear packets. */ r300_emit_zmask_clear(r300, r300->zmask_clear.size, r300->zmask_clear.state); - if (r300->hiz_clear.dirty) + r300->zmask_clear.dirty = FALSE; + if (r300->hiz_clear.dirty) { r300_emit_hiz_clear(r300, r300->hiz_clear.size, r300->hiz_clear.state); + r300->hiz_clear.dirty = FALSE; + } } else { assert(0); } -- cgit v1.2.3 From 510b03539413552a543e25de6b896eb10baf60ae Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Sun, 15 Aug 2010 16:21:46 +0100 Subject: llvmpipe: reorganize block4 loop, nice speedup isosurf 95->115 fps just by exchanging the two inner loops in this function... --- src/gallium/drivers/llvmpipe/lp_rast_tri.c | 15 +++++++++++++++ src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h | 16 ++++------------ 2 files changed, 19 insertions(+), 12 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri.c b/src/gallium/drivers/llvmpipe/lp_rast_tri.c index ebe9a8e92b..c1f2680ddc 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_rast_tri.c @@ -113,6 +113,21 @@ block_full_16(struct lp_rasterizer_task *task, block_full_4(task, tri, x + ix, y + iy); } + +static INLINE unsigned +build_mask(int c, const int *step) +{ + int mask = 0; + int i; + + for (i = 0; i < 16; i++) { + mask |= ((c + step[i]) >> 31) & (1 << i); + } + + return mask; +} + + #define TAG(x) x##_1 #define NR_PLANES 1 #include "lp_rast_tri_tmp.h" diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h b/src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h index a410c611a3..fcb8e2b05d 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h +++ b/src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h @@ -46,19 +46,11 @@ TAG(do_block_4)(struct lp_rasterizer_task *task, int x, int y, const int *c) { - unsigned mask = 0; - int i; + unsigned mask = 0xffff; + int j; - for (i = 0; i < 16; i++) { - int any_negative = 0; - int j; - - for (j = 0; j < NR_PLANES; j++) - any_negative |= (c[j] - 1 + plane[j].step[i]); - - any_negative >>= 31; - - mask |= (~any_negative) & (1 << i); + for (j = 0; j < NR_PLANES; j++) { + mask &= ~build_mask(c[j] - 1, plane[j].step); } /* Now pass to the shader: -- cgit v1.2.3 From 515194968d033d2c0c5678677f7606d38635d747 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Sun, 15 Aug 2010 16:32:45 +0100 Subject: llvmpipe: version of block4 which doesn't need the full step array No noticable slowdown with isosurf. --- src/gallium/drivers/llvmpipe/lp_rast_tri.c | 30 ++++++++++++++++++++------ src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h | 4 +++- 2 files changed, 27 insertions(+), 7 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri.c b/src/gallium/drivers/llvmpipe/lp_rast_tri.c index c1f2680ddc..28a0446172 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_rast_tri.c @@ -115,19 +115,37 @@ block_full_16(struct lp_rasterizer_task *task, static INLINE unsigned -build_mask(int c, const int *step) +build_mask(int c, int dcdx, int dcdy) { int mask = 0; - int i; - for (i = 0; i < 16; i++) { - mask |= ((c + step[i]) >> 31) & (1 << i); - } - + int c0 = c; + int c1 = c0 + dcdx; + int c2 = c1 + dcdx; + int c3 = c2 + dcdx; + + mask |= ((c0 + 0 * dcdy) >> 31) & (1 << 0); + mask |= ((c0 + 1 * dcdy) >> 31) & (1 << 2); + mask |= ((c0 + 2 * dcdy) >> 31) & (1 << 8); + mask |= ((c0 + 3 * dcdy) >> 31) & (1 << 10); + mask |= ((c1 + 0 * dcdy) >> 31) & (1 << 1); + mask |= ((c1 + 1 * dcdy) >> 31) & (1 << 3); + mask |= ((c1 + 2 * dcdy) >> 31) & (1 << 9); + mask |= ((c1 + 3 * dcdy) >> 31) & (1 << 11); + mask |= ((c2 + 0 * dcdy) >> 31) & (1 << 4); + mask |= ((c2 + 1 * dcdy) >> 31) & (1 << 6); + mask |= ((c2 + 2 * dcdy) >> 31) & (1 << 12); + mask |= ((c2 + 3 * dcdy) >> 31) & (1 << 14); + mask |= ((c3 + 0 * dcdy) >> 31) & (1 << 5); + mask |= ((c3 + 1 * dcdy) >> 31) & (1 << 7); + mask |= ((c3 + 2 * dcdy) >> 31) & (1 << 13); + mask |= ((c3 + 3 * dcdy) >> 31) & (1 << 15); + return mask; } + #define TAG(x) x##_1 #define NR_PLANES 1 #include "lp_rast_tri_tmp.h" diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h b/src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h index fcb8e2b05d..e14886f0b9 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h +++ b/src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h @@ -50,7 +50,9 @@ TAG(do_block_4)(struct lp_rasterizer_task *task, int j; for (j = 0; j < NR_PLANES; j++) { - mask &= ~build_mask(c[j] - 1, plane[j].step); + mask &= ~build_mask(c[j] - 1, + plane[j].step[1], + plane[j].step[2]); } /* Now pass to the shader: -- cgit v1.2.3 From 4b322e71bb169af637864922edfb4108675781bb Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Sun, 15 Aug 2010 16:49:26 +0100 Subject: llvmpipe: also use build_mask at 16, 64 pixel levels --- src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h | 47 +++++++++++--------------- 1 file changed, 19 insertions(+), 28 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h b/src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h index e14886f0b9..6d0be76713 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h +++ b/src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h @@ -73,24 +73,19 @@ TAG(do_block_16)(struct lp_rasterizer_task *task, const int *c) { unsigned outmask, inmask, partmask, partial_mask; - unsigned i, j; + unsigned j; outmask = 0; /* outside one or more trivial reject planes */ partmask = 0; /* outside one or more trivial accept planes */ for (j = 0; j < NR_PLANES; j++) { - const int *step = plane[j].step; - const int eo = plane[j].eo * 4; - const int ei = plane[j].ei * 4; - const int cox = c[j] + eo; - const int cio = ei - 1 - eo; - - for (i = 0; i < 16; i++) { - int out = cox + step[i] * 4; - int part = out + cio; - outmask |= (out >> 31) & (1 << i); - partmask |= (part >> 31) & (1 << i); - } + const int dcdx = plane[j].step[1] * 4; + const int dcdy = plane[j].step[2] * 4; + const int cox = c[j] + plane[j].eo * 4; + const int cio = c[j] + plane[j].ei * 4 - 1; + + outmask |= build_mask(cox, dcdx, dcdy); + partmask |= build_mask(cio, dcdx, dcdy); } if (outmask == 0xffff) @@ -151,7 +146,7 @@ TAG(lp_rast_triangle)(struct lp_rasterizer_task *task, struct lp_rast_plane plane[NR_PLANES]; int c[NR_PLANES]; unsigned outmask, inmask, partmask, partial_mask; - unsigned i, j, nr_planes = 0; + unsigned j, nr_planes = 0; while (plane_mask) { int i = ffs(plane_mask) - 1; @@ -165,21 +160,17 @@ TAG(lp_rast_triangle)(struct lp_rasterizer_task *task, partmask = 0; /* outside one or more trivial accept planes */ for (j = 0; j < NR_PLANES; j++) { - const int *step = plane[j].step; - const int eo = plane[j].eo * 16; - const int ei = plane[j].ei * 16; - int cox, cio; - c[j] = plane[j].c + plane[j].dcdy * y - plane[j].dcdx * x; - cox = c[j] + eo; - cio = ei - 1 - eo; - - for (i = 0; i < 16; i++) { - int out = cox + step[i] * 16; - int part = out + cio; - outmask |= (out >> 31) & (1 << i); - partmask |= (part >> 31) & (1 << i); - } + } + + for (j = 0; j < NR_PLANES; j++) { + const int dcdx = plane[j].step[1] * 16; + const int dcdy = plane[j].step[2] * 16; + const int cox = c[j] + plane[j].eo * 16; + const int cio = c[j] + plane[j].ei * 16 - 1; + + outmask |= build_mask(cox, dcdx, dcdy); + partmask |= build_mask(cio, dcdx, dcdy); } if (outmask == 0xffff) -- cgit v1.2.3 From ee0d1c29eeddfa364a18783507acd4d031029ba2 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Sun, 15 Aug 2010 17:22:25 +0100 Subject: llvmpipe: don't refer to plane->step when dcdx or dcdy would do --- src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h b/src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h index 6d0be76713..905f3df213 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h +++ b/src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h @@ -51,8 +51,8 @@ TAG(do_block_4)(struct lp_rasterizer_task *task, for (j = 0; j < NR_PLANES; j++) { mask &= ~build_mask(c[j] - 1, - plane[j].step[1], - plane[j].step[2]); + -plane[j].dcdx, + plane[j].dcdy); } /* Now pass to the shader: @@ -79,8 +79,8 @@ TAG(do_block_16)(struct lp_rasterizer_task *task, partmask = 0; /* outside one or more trivial accept planes */ for (j = 0; j < NR_PLANES; j++) { - const int dcdx = plane[j].step[1] * 4; - const int dcdy = plane[j].step[2] * 4; + const int dcdx = -plane[j].dcdx * 4; + const int dcdy = plane[j].dcdy * 4; const int cox = c[j] + plane[j].eo * 4; const int cio = c[j] + plane[j].ei * 4 - 1; @@ -164,8 +164,8 @@ TAG(lp_rast_triangle)(struct lp_rasterizer_task *task, } for (j = 0; j < NR_PLANES; j++) { - const int dcdx = plane[j].step[1] * 16; - const int dcdy = plane[j].step[2] * 16; + const int dcdx = -plane[j].dcdx * 16; + const int dcdy = plane[j].dcdy * 16; const int cox = c[j] + plane[j].eo * 16; const int cio = c[j] + plane[j].ei * 16 - 1; -- cgit v1.2.3 From 4c0641454b952f2c240de8c83511703f98e1f72f Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Sun, 15 Aug 2010 17:24:54 +0100 Subject: llvmpipe: eliminate last usage of step array in rast_tmp.h For 16 and 64 pixel levels, calculate a mask which is linear in x and y (ie not in the swizzle layout). When iterating over full and partial masks, figure out position by manipulating the bit number set in the mask, rather than relying on postion arrays. Similarly, calculate the lower-level c values from dcdx, dcdy and the position rather than relying on the step array. --- src/gallium/drivers/llvmpipe/lp_rast_tri.c | 29 +++++++++++++++++ src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h | 44 ++++++++++++++++---------- 2 files changed, 57 insertions(+), 16 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri.c b/src/gallium/drivers/llvmpipe/lp_rast_tri.c index 28a0446172..8ecd0567df 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_rast_tri.c @@ -144,6 +144,35 @@ build_mask(int c, int dcdx, int dcdy) return mask; } +static INLINE unsigned +build_mask_linear(int c, int dcdx, int dcdy) +{ + int mask = 0; + + int c0 = c; + int c1 = c0 + dcdy; + int c2 = c1 + dcdy; + int c3 = c2 + dcdy; + + mask |= ((c0 + 0 * dcdx) >> 31) & (1 << 0); + mask |= ((c0 + 1 * dcdx) >> 31) & (1 << 1); + mask |= ((c0 + 2 * dcdx) >> 31) & (1 << 2); + mask |= ((c0 + 3 * dcdx) >> 31) & (1 << 3); + mask |= ((c1 + 0 * dcdx) >> 31) & (1 << 4); + mask |= ((c1 + 1 * dcdx) >> 31) & (1 << 5); + mask |= ((c1 + 2 * dcdx) >> 31) & (1 << 6); + mask |= ((c1 + 3 * dcdx) >> 31) & (1 << 7); + mask |= ((c2 + 0 * dcdx) >> 31) & (1 << 8); + mask |= ((c2 + 1 * dcdx) >> 31) & (1 << 9); + mask |= ((c2 + 2 * dcdx) >> 31) & (1 << 10); + mask |= ((c2 + 3 * dcdx) >> 31) & (1 << 11); + mask |= ((c3 + 0 * dcdx) >> 31) & (1 << 12); + mask |= ((c3 + 1 * dcdx) >> 31) & (1 << 13); + mask |= ((c3 + 2 * dcdx) >> 31) & (1 << 14); + mask |= ((c3 + 3 * dcdx) >> 31) & (1 << 15); + + return mask; +} #define TAG(x) x##_1 diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h b/src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h index 905f3df213..98ebcfa870 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h +++ b/src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h @@ -84,8 +84,8 @@ TAG(do_block_16)(struct lp_rasterizer_task *task, const int cox = c[j] + plane[j].eo * 4; const int cio = c[j] + plane[j].ei * 4 - 1; - outmask |= build_mask(cox, dcdx, dcdy); - partmask |= build_mask(cio, dcdx, dcdy); + outmask |= build_mask_linear(cox, dcdx, dcdy); + partmask |= build_mask_linear(cio, dcdx, dcdy); } if (outmask == 0xffff) @@ -106,15 +106,19 @@ TAG(do_block_16)(struct lp_rasterizer_task *task, */ while (partial_mask) { int i = ffs(partial_mask) - 1; - int px = x + pos_table4[i][0]; - int py = y + pos_table4[i][1]; + int ix = (i & 3) * 4; + int iy = (i >> 2) * 4; + int px = x + ix; + int py = y + iy; int cx[NR_PLANES]; - for (j = 0; j < NR_PLANES; j++) - cx[j] = c[j] + plane[j].step[i] * 4; - partial_mask &= ~(1 << i); + for (j = 0; j < NR_PLANES; j++) + cx[j] = (c[j] + - plane[j].dcdx * ix + + plane[j].dcdy * iy); + TAG(do_block_4)(task, tri, plane, px, py, cx); } @@ -122,8 +126,10 @@ TAG(do_block_16)(struct lp_rasterizer_task *task, */ while (inmask) { int i = ffs(inmask) - 1; - int px = x + pos_table4[i][0]; - int py = y + pos_table4[i][1]; + int ix = (i & 3) * 4; + int iy = (i >> 2) * 4; + int px = x + ix; + int py = y + iy; inmask &= ~(1 << i); @@ -169,8 +175,8 @@ TAG(lp_rast_triangle)(struct lp_rasterizer_task *task, const int cox = c[j] + plane[j].eo * 16; const int cio = c[j] + plane[j].ei * 16 - 1; - outmask |= build_mask(cox, dcdx, dcdy); - partmask |= build_mask(cio, dcdx, dcdy); + outmask |= build_mask_linear(cox, dcdx, dcdy); + partmask |= build_mask_linear(cio, dcdx, dcdy); } if (outmask == 0xffff) @@ -191,12 +197,16 @@ TAG(lp_rast_triangle)(struct lp_rasterizer_task *task, */ while (partial_mask) { int i = ffs(partial_mask) - 1; - int px = x + pos_table16[i][0]; - int py = y + pos_table16[i][1]; + int ix = (i & 3) * 16; + int iy = (i >> 2) * 16; + int px = x + ix; + int py = y + iy; int cx[NR_PLANES]; for (j = 0; j < NR_PLANES; j++) - cx[j] = c[j] + plane[j].step[i] * 16; + cx[j] = (c[j] + - plane[j].dcdx * ix + + plane[j].dcdy * iy); partial_mask &= ~(1 << i); @@ -208,8 +218,10 @@ TAG(lp_rast_triangle)(struct lp_rasterizer_task *task, */ while (inmask) { int i = ffs(inmask) - 1; - int px = x + pos_table16[i][0]; - int py = y + pos_table16[i][1]; + int ix = (i & 3) * 16; + int iy = (i >> 2) * 16; + int px = x + ix; + int py = y + iy; inmask &= ~(1 << i); -- cgit v1.2.3 From ff26594a92df37608a3efe47e4d4f3a55bcd6bc1 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Sun, 15 Aug 2010 17:31:13 +0100 Subject: llvmpipe: remove all traces of step arrays, pos_tables No need to calculate these values any longer, nor to store them in the bin data. Improves isosurf a bit more, 115->123 fps. --- src/gallium/drivers/llvmpipe/lp_rast.h | 5 --- src/gallium/drivers/llvmpipe/lp_rast_tri.c | 46 --------------------- src/gallium/drivers/llvmpipe/lp_setup_tri.c | 63 ----------------------------- 3 files changed, 114 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/llvmpipe/lp_rast.h b/src/gallium/drivers/llvmpipe/lp_rast.h index eaf2a6f334..44319a0ad6 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.h +++ b/src/gallium/drivers/llvmpipe/lp_rast.h @@ -104,9 +104,6 @@ struct lp_rast_plane { int dcdx; int dcdy; - - /* edge/step info for 3 edges and 4x4 block of pixels */ - const int *step; }; /** @@ -119,8 +116,6 @@ struct lp_rast_triangle { /* inputs for the shader */ struct lp_rast_shader_inputs inputs; - int step[3][16]; - #ifdef DEBUG float v[3][2]; #endif diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri.c b/src/gallium/drivers/llvmpipe/lp_rast_tri.c index 8ecd0567df..980c18c024 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_rast_tri.c @@ -37,52 +37,6 @@ #include "lp_tile_soa.h" -/** - * Map an index in [0,15] to an x,y position, multiplied by 4. - * This is used to get the position of each subtile in a 4x4 - * grid of edge step values. - * Note: we can use some bit twiddling to compute these values instead - * of using a look-up table, but there's no measurable performance - * difference. - */ -static const int pos_table4[16][2] = { - { 0, 0 }, - { 4, 0 }, - { 0, 4 }, - { 4, 4 }, - { 8, 0 }, - { 12, 0 }, - { 8, 4 }, - { 12, 4 }, - { 0, 8 }, - { 4, 8 }, - { 0, 12 }, - { 4, 12 }, - { 8, 8 }, - { 12, 8 }, - { 8, 12 }, - { 12, 12 } -}; - - -static const int pos_table16[16][2] = { - { 0, 0 }, - { 16, 0 }, - { 0, 16 }, - { 16, 16 }, - { 32, 0 }, - { 48, 0 }, - { 32, 16 }, - { 48, 16 }, - { 0, 32 }, - { 16, 32 }, - { 0, 48 }, - { 16, 48 }, - { 32, 32 }, - { 48, 32 }, - { 32, 48 }, - { 48, 48 } -}; /** diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c index 20e63ae51f..393533ebee 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c @@ -61,36 +61,6 @@ struct tri_info { -static const int step_scissor_minx[16] = { - 0, 1, 0, 1, - 2, 3, 2, 3, - 0, 1, 0, 1, - 2, 3, 2, 3 -}; - -static const int step_scissor_maxx[16] = { - 0, -1, 0, -1, - -2, -3, -2, -3, - 0, -1, 0, -1, - -2, -3, -2, -3 -}; - -static const int step_scissor_miny[16] = { - 0, 0, 1, 1, - 0, 0, 1, 1, - 2, 2, 3, 3, - 2, 2, 3, 3 -}; - -static const int step_scissor_maxy[16] = { - 0, 0, -1, -1, - 0, 0, -1, -1, - -2, -2, -3, -3, - -2, -2, -3, -3 -}; - - - static INLINE int subpixel_snap(float a) @@ -618,35 +588,6 @@ do_triangle_ccw(struct lp_setup_context *setup, /* Calculate trivial accept offsets from the above. */ plane->ei = plane->dcdy - plane->dcdx - plane->eo; - - plane->step = tri->step[i]; - - /* Fill in the inputs.step[][] arrays. - * We've manually unrolled some loops here. - */ -#define SETUP_STEP(j, x, y) \ - tri->step[i][j] = y * plane->dcdy - x * plane->dcdx - - SETUP_STEP(0, 0, 0); - SETUP_STEP(1, 1, 0); - SETUP_STEP(2, 0, 1); - SETUP_STEP(3, 1, 1); - - SETUP_STEP(4, 2, 0); - SETUP_STEP(5, 3, 0); - SETUP_STEP(6, 2, 1); - SETUP_STEP(7, 3, 1); - - SETUP_STEP(8, 0, 2); - SETUP_STEP(9, 1, 2); - SETUP_STEP(10, 0, 3); - SETUP_STEP(11, 1, 3); - - SETUP_STEP(12, 2, 2); - SETUP_STEP(13, 3, 2); - SETUP_STEP(14, 2, 3); - SETUP_STEP(15, 3, 3); -#undef STEP } @@ -669,28 +610,24 @@ do_triangle_ccw(struct lp_setup_context *setup, * these planes elsewhere. */ if (nr_planes == 7) { - tri->plane[3].step = step_scissor_minx; tri->plane[3].dcdx = -1; tri->plane[3].dcdy = 0; tri->plane[3].c = 1-minx; tri->plane[3].ei = 0; tri->plane[3].eo = 1; - tri->plane[4].step = step_scissor_maxx; tri->plane[4].dcdx = 1; tri->plane[4].dcdy = 0; tri->plane[4].c = maxx; tri->plane[4].ei = -1; tri->plane[4].eo = 0; - tri->plane[5].step = step_scissor_miny; tri->plane[5].dcdx = 0; tri->plane[5].dcdy = 1; tri->plane[5].c = 1-miny; tri->plane[5].ei = 0; tri->plane[5].eo = 1; - tri->plane[6].step = step_scissor_maxy; tri->plane[6].dcdx = 0; tri->plane[6].dcdy = -1; tri->plane[6].c = maxy; -- cgit v1.2.3 From 85d9bc236d6a8ff8f12cbc2150f8c3740354f573 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Sun, 15 Aug 2010 17:40:39 +0100 Subject: llvmpipe: consolidate several loops in lp_rast_triangle --- src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h | 33 ++++++++++++-------------- 1 file changed, 15 insertions(+), 18 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h b/src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h index 98ebcfa870..43f72d8ca8 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h +++ b/src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h @@ -152,31 +152,28 @@ TAG(lp_rast_triangle)(struct lp_rasterizer_task *task, struct lp_rast_plane plane[NR_PLANES]; int c[NR_PLANES]; unsigned outmask, inmask, partmask, partial_mask; - unsigned j, nr_planes = 0; + unsigned j = 0; - while (plane_mask) { - int i = ffs(plane_mask) - 1; - plane[nr_planes] = tri->plane[i]; - plane_mask &= ~(1 << i); - nr_planes++; - }; - - assert(nr_planes == NR_PLANES); outmask = 0; /* outside one or more trivial reject planes */ partmask = 0; /* outside one or more trivial accept planes */ - for (j = 0; j < NR_PLANES; j++) { + while (plane_mask) { + int i = ffs(plane_mask) - 1; + plane[j] = tri->plane[i]; + plane_mask &= ~(1 << i); c[j] = plane[j].c + plane[j].dcdy * y - plane[j].dcdx * x; - } - for (j = 0; j < NR_PLANES; j++) { - const int dcdx = -plane[j].dcdx * 16; - const int dcdy = plane[j].dcdy * 16; - const int cox = c[j] + plane[j].eo * 16; - const int cio = c[j] + plane[j].ei * 16 - 1; + { + const int dcdx = -plane[j].dcdx * 16; + const int dcdy = plane[j].dcdy * 16; + const int cox = c[j] + plane[j].eo * 16; + const int cio = c[j] + plane[j].ei * 16 - 1; - outmask |= build_mask_linear(cox, dcdx, dcdy); - partmask |= build_mask_linear(cio, dcdx, dcdy); + outmask |= build_mask_linear(cox, dcdx, dcdy); + partmask |= build_mask_linear(cio, dcdx, dcdy); + } + + j++; } if (outmask == 0xffff) -- cgit v1.2.3