From de553d906b4a205d811a9e1651f14212ec284e29 Mon Sep 17 00:00:00 2001 From: Jerome Glisse Date: Fri, 23 Jul 2010 17:32:32 -0400 Subject: r600g: drop compiler stuff and switch over dumb tgsi assembler Writing a compiler is time consuming and error prone in order to allow r600g to further progress in the meantime i wrote a simple tgsi assembler, it does stupid thing but i would rather keep the code simple than having people trying to optimize code it does. Signed-off-by: Jerome Glisse --- src/gallium/drivers/r600/Makefile | 7 +- src/gallium/drivers/r600/r600_asm.c | 385 ++++++++++ src/gallium/drivers/r600/r600_asm.h | 112 +++ src/gallium/drivers/r600/r600_compiler.c | 447 ------------ src/gallium/drivers/r600/r600_compiler.h | 320 --------- src/gallium/drivers/r600/r600_compiler_dump.c | 267 ------- src/gallium/drivers/r600/r600_compiler_r600.c | 972 -------------------------- src/gallium/drivers/r600/r600_compiler_r700.c | 233 ------ src/gallium/drivers/r600/r600_compiler_tgsi.c | 730 ------------------- src/gallium/drivers/r600/r600_context.c | 2 +- src/gallium/drivers/r600/r600_context.h | 5 +- src/gallium/drivers/r600/r600_shader.c | 784 ++++++++++++++++++--- src/gallium/drivers/r600/r600_shader.h | 244 +------ src/gallium/drivers/r600/r600_sq.h | 18 +- src/gallium/drivers/r600/r600_state.c | 4 +- src/gallium/drivers/r600/r700_asm.c | 70 ++ 16 files changed, 1277 insertions(+), 3323 deletions(-) create mode 100644 src/gallium/drivers/r600/r600_asm.c create mode 100644 src/gallium/drivers/r600/r600_asm.h delete mode 100644 src/gallium/drivers/r600/r600_compiler.c delete mode 100644 src/gallium/drivers/r600/r600_compiler.h delete mode 100644 src/gallium/drivers/r600/r600_compiler_dump.c delete mode 100644 src/gallium/drivers/r600/r600_compiler_r600.c delete mode 100644 src/gallium/drivers/r600/r600_compiler_r700.c delete mode 100644 src/gallium/drivers/r600/r600_compiler_tgsi.c create mode 100644 src/gallium/drivers/r600/r700_asm.c (limited to 'src/gallium') diff --git a/src/gallium/drivers/r600/Makefile b/src/gallium/drivers/r600/Makefile index aae31a6a6e..8f1e1366b5 100644 --- a/src/gallium/drivers/r600/Makefile +++ b/src/gallium/drivers/r600/Makefile @@ -18,10 +18,7 @@ C_SOURCES = \ r600_state.c \ r600_texture.c \ r600_shader.c \ - r600_compiler.c \ - r600_compiler_tgsi.c \ - r600_compiler_dump.c \ - r600_compiler_r600.c \ - r600_compiler_r700.c + r600_asm.c \ + r700_asm.c include ../../Makefile.template diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c new file mode 100644 index 0000000000..6e48703a57 --- /dev/null +++ b/src/gallium/drivers/r600/r600_asm.c @@ -0,0 +1,385 @@ +/* + * Copyright 2010 Jerome Glisse + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#include "r600_asm.h" +#include "r600_context.h" +#include "util/u_memory.h" +#include "r600_sq.h" +#include +#include + +int r700_bc_alu_build(struct r600_bc *bc, struct r600_bc_alu *alu, unsigned id); + +static struct r600_bc_cf *r600_bc_cf(void) +{ + struct r600_bc_cf *cf = CALLOC_STRUCT(r600_bc_cf); + + if (cf == NULL) + return NULL; + LIST_INITHEAD(&cf->list); + LIST_INITHEAD(&cf->alu); + LIST_INITHEAD(&cf->vtx); + return cf; +} + +static struct r600_bc_alu *r600_bc_alu(void) +{ + struct r600_bc_alu *alu = CALLOC_STRUCT(r600_bc_alu); + + if (alu == NULL) + return NULL; + LIST_INITHEAD(&alu->list); + return alu; +} + +static struct r600_bc_vtx *r600_bc_vtx(void) +{ + struct r600_bc_vtx *vtx = CALLOC_STRUCT(r600_bc_vtx); + + if (vtx == NULL) + return NULL; + LIST_INITHEAD(&vtx->list); + return vtx; +} + +int r600_bc_init(struct r600_bc *bc, enum radeon_family family) +{ + LIST_INITHEAD(&bc->cf); + bc->family = family; + return 0; +} + +static int r600_bc_add_cf(struct r600_bc *bc) +{ + struct r600_bc_cf *cf = r600_bc_cf(); + + if (cf == NULL) + return -ENOMEM; + LIST_ADDTAIL(&cf->list, &bc->cf); + if (bc->cf_last) + cf->id = bc->cf_last->id + 2; + bc->cf_last = cf; + bc->ncf++; + bc->ndw += 2; + return 0; +} + +int r600_bc_add_output(struct r600_bc *bc, const struct r600_bc_output *output) +{ + int r; + + r = r600_bc_add_cf(bc); + if (r) + return r; + bc->cf_last->inst = output->inst; + memcpy(&bc->cf_last->output, output, sizeof(struct r600_bc_output)); + return 0; +} + +int r600_bc_add_alu(struct r600_bc *bc, const struct r600_bc_alu *alu) +{ + struct r600_bc_alu *nalu = r600_bc_alu(); + struct r600_bc_alu *lalu; + int i, r; + + if (nalu == NULL) + return -ENOMEM; + memcpy(nalu, alu, sizeof(struct r600_bc_alu)); + nalu->nliteral = 0; + + /* cf can contains only alu or only vtx or only tex */ + if (bc->cf_last == NULL || bc->cf_last->inst != (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3)) { + r = r600_bc_add_cf(bc); + if (r) { + free(nalu); + return r; + } + bc->cf_last->inst = V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3; + } + /* number of gpr == the last gpr used in any alu */ + for (i = 0; i < 3; i++) { + if (alu->src[i].sel >= bc->ngpr && alu->src[i].sel < 128) { + bc->ngpr = alu->src[i].sel + 1; + } + /* compute how many literal are needed + * either 2 or 4 literals + */ + if (alu->src[i].sel == 253) { + if (((alu->src[i].chan + 2) & 0x6) > nalu->nliteral) { + nalu->nliteral = (alu->src[i].chan + 2) & 0x6; + } + } + } + if (!LIST_IS_EMPTY(&bc->cf_last->alu)) { + lalu = LIST_ENTRY(struct r600_bc_alu, bc->cf_last->alu.prev, list); + if (!lalu->last && lalu->nliteral > nalu->nliteral) { + nalu->nliteral = lalu->nliteral; + } + } + if (alu->dst.sel >= bc->ngpr) { + bc->ngpr = alu->dst.sel + 1; + } + LIST_ADDTAIL(&nalu->list, &bc->cf_last->alu); + /* each alu use 2 dwords */ + bc->cf_last->ndw += 2; + bc->ndw += 2; + return 0; +} + +int r600_bc_add_literal(struct r600_bc *bc, const u32 *value) +{ + struct r600_bc_alu *alu; + + if (bc->cf_last == NULL || + bc->cf_last->inst != (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3) || + LIST_IS_EMPTY(&bc->cf_last->alu)) { + R600_ERR("last CF is not ALU (%p)\n", bc->cf_last); + return -EINVAL; + } + alu = LIST_ENTRY(struct r600_bc_alu, bc->cf_last->alu.prev, list); + if (!alu->last || !alu->nliteral) { + return 0; + } + memcpy(alu->value, value, 4 * 4); + bc->cf_last->ndw += alu->nliteral; + bc->ndw += alu->nliteral; + return 0; +} + +int r600_bc_add_vtx(struct r600_bc *bc, const struct r600_bc_vtx *vtx) +{ + struct r600_bc_vtx *nvtx = r600_bc_vtx(); + int r; + + if (nvtx == NULL) + return -ENOMEM; + memcpy(nvtx, vtx, sizeof(struct r600_bc_vtx)); + + /* cf can contains only alu or only vtx or only tex */ + if (bc->cf_last == NULL || + (bc->cf_last->inst != V_SQ_CF_WORD1_SQ_CF_INST_VTX && + bc->cf_last->inst != V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC)) { + r = r600_bc_add_cf(bc); + if (r) { + free(nvtx); + return r; + } + bc->cf_last->inst = V_SQ_CF_WORD1_SQ_CF_INST_VTX; + } + LIST_ADDTAIL(&nvtx->list, &bc->cf_last->vtx); + /* each fetch use 6 dwords */ + bc->cf_last->ndw += 4; + bc->ndw += 4; + return 0; +} + +int r600_bc_vtx_build(struct r600_bc *bc, struct r600_bc_vtx *vtx, unsigned id) +{ + bc->bytecode[id++] = S_SQ_VTX_WORD0_BUFFER_ID(vtx->buffer_id) | + S_SQ_VTX_WORD0_SRC_GPR(vtx->src_gpr) | + S_SQ_VTX_WORD0_SRC_SEL_X(vtx->src_sel_x) | + S_SQ_VTX_WORD0_MEGA_FETCH_COUNT(vtx->mega_fetch_count); + bc->bytecode[id++] = S_SQ_VTX_WORD1_DST_SEL_X(vtx->dst_sel_x) | + S_SQ_VTX_WORD1_DST_SEL_Y(vtx->dst_sel_y) | + S_SQ_VTX_WORD1_DST_SEL_Z(vtx->dst_sel_z) | + S_SQ_VTX_WORD1_DST_SEL_W(vtx->dst_sel_w) | + S_SQ_VTX_WORD1_USE_CONST_FIELDS(1) | + S_SQ_VTX_WORD1_GPR_DST_GPR(vtx->dst_gpr); + bc->bytecode[id++] = S_SQ_VTX_WORD2_MEGA_FETCH(1); + bc->bytecode[id++] = 0; + return 0; +} + +int r600_bc_alu_build(struct r600_bc *bc, struct r600_bc_alu *alu, unsigned id) +{ + unsigned i; + + /* don't replace gpr by pv or ps for destination register */ + if (alu->is_op3) { + bc->bytecode[id++] = S_SQ_ALU_WORD0_SRC0_SEL(alu->src[0].sel) | + S_SQ_ALU_WORD0_SRC0_CHAN(alu->src[0].chan) | + S_SQ_ALU_WORD0_SRC1_SEL(alu->src[1].sel) | + S_SQ_ALU_WORD0_SRC1_CHAN(alu->src[1].chan) | + S_SQ_ALU_WORD0_LAST(alu->last); + bc->bytecode[id++] = S_SQ_ALU_WORD1_DST_GPR(alu->dst.sel) | + S_SQ_ALU_WORD1_DST_CHAN(alu->dst.chan) | + S_SQ_ALU_WORD1_OP3_SRC2_SEL(alu->src[2].sel) | + S_SQ_ALU_WORD1_OP3_SRC2_CHAN(alu->src[2].chan) | + S_SQ_ALU_WORD1_OP3_SRC2_NEG(alu->src[2].neg) | + S_SQ_ALU_WORD1_OP3_ALU_INST(alu->inst) | + S_SQ_ALU_WORD1_BANK_SWIZZLE(0); + } else { + bc->bytecode[id++] = S_SQ_ALU_WORD0_SRC0_SEL(alu->src[0].sel) | + S_SQ_ALU_WORD0_SRC0_CHAN(alu->src[0].chan) | + S_SQ_ALU_WORD0_SRC0_NEG(alu->src[0].neg) | + S_SQ_ALU_WORD0_SRC1_SEL(alu->src[1].sel) | + S_SQ_ALU_WORD0_SRC1_CHAN(alu->src[1].chan) | + S_SQ_ALU_WORD0_SRC1_NEG(alu->src[1].neg) | + S_SQ_ALU_WORD0_LAST(alu->last); + bc->bytecode[id++] = S_SQ_ALU_WORD1_DST_GPR(alu->dst.sel) | + S_SQ_ALU_WORD1_DST_CHAN(alu->dst.chan) | + S_SQ_ALU_WORD1_OP2_SRC0_ABS(alu->src[0].abs) | + S_SQ_ALU_WORD1_OP2_SRC1_ABS(alu->src[1].abs) | + S_SQ_ALU_WORD1_OP2_WRITE_MASK(alu->dst.write) | + S_SQ_ALU_WORD1_OP2_ALU_INST(alu->inst) | + S_SQ_ALU_WORD1_BANK_SWIZZLE(0); + } + if (alu->last) { + for (i = 0; i < alu->nliteral; i++) { + bc->bytecode[id++] = alu->value[i]; + } + } + return 0; +} + +int r600_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf) +{ + unsigned id = cf->id; + + switch (cf->inst) { + case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3): + bc->bytecode[id++] = S_SQ_CF_ALU_WORD0_ADDR(cf->addr >> 1); + bc->bytecode[id++] = S_SQ_CF_ALU_WORD1_CF_INST(cf->inst >> 3) | + S_SQ_CF_ALU_WORD1_BARRIER(1) | + S_SQ_CF_ALU_WORD1_COUNT((cf->ndw / 2) - 1); + break; + case V_SQ_CF_WORD1_SQ_CF_INST_VTX: + case V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC: + bc->bytecode[id++] = S_SQ_CF_WORD0_ADDR(cf->addr >> 1); + bc->bytecode[id++] = S_SQ_CF_WORD1_CF_INST(cf->inst) | + S_SQ_CF_WORD1_BARRIER(1) | + S_SQ_CF_WORD1_COUNT((cf->ndw / 4) - 1); + break; + case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT: + case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE: + bc->bytecode[id++] = S_SQ_CF_ALLOC_EXPORT_WORD0_RW_GPR(cf->output.gpr) | + S_SQ_CF_ALLOC_EXPORT_WORD0_ELEM_SIZE(cf->output.elem_size) | + S_SQ_CF_ALLOC_EXPORT_WORD0_ARRAY_BASE(cf->output.array_base) | + S_SQ_CF_ALLOC_EXPORT_WORD0_TYPE(cf->output.type); + bc->bytecode[id++] = S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_X(cf->output.swizzle_x) | + S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Y(cf->output.swizzle_y) | + S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Z(cf->output.swizzle_z) | + S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_W(cf->output.swizzle_w) | + S_SQ_CF_ALLOC_EXPORT_WORD1_BARRIER(cf->output.barrier) | + S_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(cf->output.inst) | + S_SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM(cf->output.end_of_program); + break; + default: + R600_ERR("unsupported CF instruction (0x%X)\n", cf->inst); + return -EINVAL; + } + return 0; +} + +int r600_bc_build(struct r600_bc *bc) +{ + struct r600_bc_cf *cf; + struct r600_bc_alu *alu; + struct r600_bc_vtx *vtx; + unsigned addr; + int r; + + + /* first path compute addr of each CF block */ + /* addr start after all the CF instructions */ + addr = bc->cf_last->id + 2; + LIST_FOR_EACH_ENTRY(cf, &bc->cf, list) { + switch (cf->inst) { + case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3): + break; + case V_SQ_CF_WORD1_SQ_CF_INST_VTX: + case V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC: + /* fetch node need to be 16 bytes aligned*/ + addr += 3; + addr &= 0xFFFFFFFCUL; + break; + case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT: + case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE: + break; + default: + R600_ERR("unsupported CF instruction (0x%X)\n", cf->inst); + return -EINVAL; + } + cf->addr = addr; + addr += cf->ndw; + bc->ndw = cf->addr + cf->ndw; + } + free(bc->bytecode); + bc->bytecode = calloc(1, bc->ndw * 4); + if (bc->bytecode == NULL) + return -ENOMEM; + LIST_FOR_EACH_ENTRY(cf, &bc->cf, list) { + addr = cf->addr; + r = r600_bc_cf_build(bc, cf); + if (r) + return r; + switch (cf->inst) { + case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3): + LIST_FOR_EACH_ENTRY(alu, &cf->alu, list) { + switch (bc->family) { + case CHIP_R600: + case CHIP_RV610: + case CHIP_RV630: + case CHIP_RV670: + case CHIP_RV620: + case CHIP_RV635: + case CHIP_RS780: + case CHIP_RS880: + r = r600_bc_alu_build(bc, alu, addr); + break; + case CHIP_RV770: + case CHIP_RV730: + case CHIP_RV710: + case CHIP_RV740: + r = r700_bc_alu_build(bc, alu, addr); + break; + default: + R600_ERR("unknown family %d\n", bc->family); + return -EINVAL; + } + if (r) + return r; + addr += 2; + if (alu->last) { + addr += alu->nliteral; + } + } + break; + case V_SQ_CF_WORD1_SQ_CF_INST_VTX: + case V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC: + LIST_FOR_EACH_ENTRY(vtx, &cf->vtx, list) { + r = r600_bc_vtx_build(bc, vtx, addr); + if (r) + return r; + addr += 4; + } + break; + case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT: + case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE: + break; + default: + R600_ERR("unsupported CF instruction (0x%X)\n", cf->inst); + return -EINVAL; + } + } + return 0; +} diff --git a/src/gallium/drivers/r600/r600_asm.h b/src/gallium/drivers/r600/r600_asm.h new file mode 100644 index 0000000000..8a874a9df5 --- /dev/null +++ b/src/gallium/drivers/r600/r600_asm.h @@ -0,0 +1,112 @@ +/* + * Copyright 2010 Jerome Glisse + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#ifndef R600_ASM_H +#define R600_ASM_H + +#include "radeon.h" +#include "util/u_double_list.h" + +struct r600_bc_alu_src { + unsigned sel; + unsigned chan; + unsigned neg; + unsigned abs; +}; + +struct r600_bc_alu_dst { + unsigned sel; + unsigned chan; + unsigned clamp; + unsigned write; +}; + +struct r600_bc_alu { + struct list_head list; + struct r600_bc_alu_src src[3]; + struct r600_bc_alu_dst dst; + unsigned inst; + unsigned last; + unsigned is_op3; + unsigned nliteral; + u32 value[4]; +}; + +struct r600_bc_vtx { + struct list_head list; + unsigned inst; + unsigned fetch_type; + unsigned buffer_id; + unsigned src_gpr; + unsigned src_sel_x; + unsigned mega_fetch_count; + unsigned dst_gpr; + unsigned dst_sel_x; + unsigned dst_sel_y; + unsigned dst_sel_z; + unsigned dst_sel_w; +}; + +struct r600_bc_output { + unsigned array_base; + unsigned type; + unsigned end_of_program; + unsigned inst; + unsigned elem_size; + unsigned gpr; + unsigned swizzle_x; + unsigned swizzle_y; + unsigned swizzle_z; + unsigned swizzle_w; + unsigned barrier; +}; + +struct r600_bc_cf { + struct list_head list; + unsigned inst; + unsigned addr; + unsigned ndw; + unsigned id; + struct list_head alu; + struct list_head vtx; + struct r600_bc_output output; +}; + +struct r600_bc { + enum radeon_family family; + struct list_head cf; + struct r600_bc_cf *cf_last; + unsigned ndw; + unsigned ncf; + unsigned ngpr; + unsigned nresource; + u32 *bytecode; +}; + +int r600_bc_init(struct r600_bc *bc, enum radeon_family family); +int r600_bc_add_alu(struct r600_bc *bc, const struct r600_bc_alu *alu); +int r600_bc_add_literal(struct r600_bc *bc, const u32 *value); +int r600_bc_add_vtx(struct r600_bc *bc, const struct r600_bc_vtx *vtx); +int r600_bc_add_output(struct r600_bc *bc, const struct r600_bc_output *output); +int r600_bc_build(struct r600_bc *bc); + +#endif diff --git a/src/gallium/drivers/r600/r600_compiler.c b/src/gallium/drivers/r600/r600_compiler.c deleted file mode 100644 index 1804b86d24..0000000000 --- a/src/gallium/drivers/r600/r600_compiler.c +++ /dev/null @@ -1,447 +0,0 @@ -/* - * Copyright 2010 Jerome Glisse - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - */ -#include -#include -#include -#include -#include -#include "r600_compiler.h" - -struct c_vector *c_vector_new(void) -{ - struct c_vector *v = calloc(1, sizeof(struct c_vector)); - - if (v == NULL) { - return NULL; - } - LIST_INITHEAD(&v->head); - return v; -} - -static unsigned c_opcode_is_alu(unsigned opcode) -{ - switch (opcode) { - case C_OPCODE_MOV: - case C_OPCODE_MUL: - case C_OPCODE_MAD: - case C_OPCODE_ARL: - case C_OPCODE_LIT: - case C_OPCODE_RCP: - case C_OPCODE_RSQ: - case C_OPCODE_EXP: - case C_OPCODE_LOG: - case C_OPCODE_ADD: - case C_OPCODE_DP3: - case C_OPCODE_DP4: - case C_OPCODE_DST: - case C_OPCODE_MIN: - case C_OPCODE_MAX: - case C_OPCODE_SLT: - case C_OPCODE_SGE: - case C_OPCODE_SUB: - case C_OPCODE_LRP: - case C_OPCODE_CND: - case C_OPCODE_DP2A: - case C_OPCODE_FRC: - case C_OPCODE_CLAMP: - case C_OPCODE_FLR: - case C_OPCODE_ROUND: - case C_OPCODE_EX2: - case C_OPCODE_LG2: - case C_OPCODE_POW: - case C_OPCODE_XPD: - case C_OPCODE_ABS: - case C_OPCODE_RCC: - case C_OPCODE_DPH: - case C_OPCODE_COS: - case C_OPCODE_DDX: - case C_OPCODE_DDY: - case C_OPCODE_PK2H: - case C_OPCODE_PK2US: - case C_OPCODE_PK4B: - case C_OPCODE_PK4UB: - case C_OPCODE_RFL: - case C_OPCODE_SEQ: - case C_OPCODE_SFL: - case C_OPCODE_SGT: - case C_OPCODE_SIN: - case C_OPCODE_SLE: - case C_OPCODE_SNE: - case C_OPCODE_STR: - case C_OPCODE_UP2H: - case C_OPCODE_UP2US: - case C_OPCODE_UP4B: - case C_OPCODE_UP4UB: - case C_OPCODE_X2D: - case C_OPCODE_ARA: - case C_OPCODE_ARR: - case C_OPCODE_BRA: - case C_OPCODE_SSG: - case C_OPCODE_CMP: - case C_OPCODE_SCS: - case C_OPCODE_NRM: - case C_OPCODE_DIV: - case C_OPCODE_DP2: - case C_OPCODE_CEIL: - case C_OPCODE_I2F: - case C_OPCODE_NOT: - case C_OPCODE_TRUNC: - case C_OPCODE_SHL: - case C_OPCODE_AND: - case C_OPCODE_OR: - case C_OPCODE_MOD: - case C_OPCODE_XOR: - case C_OPCODE_SAD: - case C_OPCODE_NRM4: - case C_OPCODE_F2I: - case C_OPCODE_IDIV: - case C_OPCODE_IMAX: - case C_OPCODE_IMIN: - case C_OPCODE_INEG: - case C_OPCODE_ISGE: - case C_OPCODE_ISHR: - case C_OPCODE_ISLT: - case C_OPCODE_F2U: - case C_OPCODE_U2F: - case C_OPCODE_UADD: - case C_OPCODE_UDIV: - case C_OPCODE_UMAD: - case C_OPCODE_UMAX: - case C_OPCODE_UMIN: - case C_OPCODE_UMOD: - case C_OPCODE_UMUL: - case C_OPCODE_USEQ: - case C_OPCODE_USGE: - case C_OPCODE_USHR: - case C_OPCODE_USLT: - case C_OPCODE_USNE: - return 1; - case C_OPCODE_END: - case C_OPCODE_VFETCH: - case C_OPCODE_KILP: - case C_OPCODE_CAL: - case C_OPCODE_RET: - case C_OPCODE_TXB: - case C_OPCODE_TXL: - case C_OPCODE_BRK: - case C_OPCODE_IF: - case C_OPCODE_BGNFOR: - case C_OPCODE_REP: - case C_OPCODE_ELSE: - case C_OPCODE_ENDIF: - case C_OPCODE_ENDFOR: - case C_OPCODE_ENDREP: - case C_OPCODE_PUSHA: - case C_OPCODE_POPA: - case C_OPCODE_TXF: - case C_OPCODE_TXQ: - case C_OPCODE_CONT: - case C_OPCODE_EMIT: - case C_OPCODE_ENDPRIM: - case C_OPCODE_BGNLOOP: - case C_OPCODE_BGNSUB: - case C_OPCODE_ENDLOOP: - case C_OPCODE_ENDSUB: - case C_OPCODE_NOP: - case C_OPCODE_CALLNZ: - case C_OPCODE_IFC: - case C_OPCODE_BREAKC: - case C_OPCODE_KIL: - case C_OPCODE_TEX: - case C_OPCODE_TXD: - case C_OPCODE_TXP: - case C_OPCODE_SWITCH: - case C_OPCODE_CASE: - case C_OPCODE_DEFAULT: - case C_OPCODE_ENDSWITCH: - default: - return 0; - } -} - - -/* NEW */ -void c_node_init(struct c_node *node) -{ - memset(node, 0, sizeof(struct c_node)); - LIST_INITHEAD(&node->predecessors); - LIST_INITHEAD(&node->successors); - LIST_INITHEAD(&node->childs); - LIST_INITHEAD(&node->insts); - node->parent = NULL; -} - -static struct c_node_link *c_node_link_new(struct c_node *node) -{ - struct c_node_link *link; - - link = calloc(1, sizeof(struct c_node_link)); - if (link == NULL) - return NULL; - LIST_INITHEAD(&link->head); - link->node = node; - return link; -} - -int c_node_cfg_link(struct c_node *predecessor, struct c_node *successor) -{ - struct c_node_link *pedge, *sedge; - - pedge = c_node_link_new(successor); - sedge = c_node_link_new(predecessor); - if (sedge == NULL || pedge == NULL) { - free(sedge); - free(pedge); - return -ENOMEM; - } - LIST_ADDTAIL(&pedge->head, &predecessor->successors); - LIST_ADDTAIL(&sedge->head, &successor->predecessors); - - return 0; -} - -int c_node_add_new_instruction_head(struct c_node *node, struct c_instruction *instruction) -{ - struct c_instruction *inst = malloc(sizeof(struct c_instruction)); - - if (inst == NULL) - return -ENOMEM; - memcpy(inst, instruction, sizeof(struct c_instruction)); - LIST_ADD(&inst->head, &node->insts); - return 0; -} - -int c_node_add_new_instruction(struct c_node *node, struct c_instruction *instruction) -{ - struct c_instruction *inst = malloc(sizeof(struct c_instruction)); - - if (inst == NULL) - return -ENOMEM; - memcpy(inst, instruction, sizeof(struct c_instruction)); - LIST_ADDTAIL(&inst->head, &node->insts); - return 0; -} - -struct c_node *c_shader_cfg_new_node_after(struct c_shader *shader, struct c_node *predecessor) -{ - struct c_node *node = calloc(1, sizeof(struct c_node)); - - if (node == NULL) - return NULL; - c_node_init(node); - if (c_node_cfg_link(predecessor, node)) { - free(node); - return NULL; - } - LIST_ADDTAIL(&node->head, &shader->nodes); - return node; -} - -int c_shader_init(struct c_shader *shader, unsigned type) -{ - unsigned i; - int r; - - shader->type = type; - for (i = 0; i < C_FILE_COUNT; i++) { - shader->files[i].nvectors = 0; - LIST_INITHEAD(&shader->files[i].vectors); - } - LIST_INITHEAD(&shader->nodes); - c_node_init(&shader->entry); - c_node_init(&shader->end); - shader->entry.opcode = C_OPCODE_ENTRY; - shader->end.opcode = C_OPCODE_END; - r = c_node_cfg_link(&shader->entry, &shader->end); - if (r) - return r; - return 0; -} - -struct c_vector *c_shader_vector_new(struct c_shader *shader, unsigned file, unsigned name, int sid) -{ - struct c_vector *v = calloc(1, sizeof(struct c_vector)); - int i; - - if (v == NULL) { - return NULL; - } - for (i = 0; i < 4; i++) { - v->channel[i] = calloc(1, sizeof(struct c_channel)); - if (v->channel[i] == NULL) - goto out_err; - v->channel[i]->vindex = i; - v->channel[i]->vector = v; - } - v->file = file; - v->name = name; - v->sid = sid; - shader->files[v->file].nvectors++; - v->id = shader->nvectors++; - LIST_ADDTAIL(&v->head, &shader->files[v->file].vectors); - return v; -out_err: - for (i = 0; i < 4; i++) { - free(v->channel[i]); - } - free(v); - return NULL; -} - -static void c_node_remove_link(struct list_head *head, struct c_node *node) -{ - struct c_node_link *link, *tmp; - - LIST_FOR_EACH_ENTRY_SAFE(link, tmp, head, head) { - if (link->node == node) { - LIST_DEL(&link->head); - free(link); - } - } -} - -static void c_node_destroy(struct c_node *node) -{ - struct c_instruction *i, *ni; - struct c_node_link *link, *tmp; - - LIST_FOR_EACH_ENTRY_SAFE(i, ni, &node->insts, head) { - LIST_DEL(&i->head); - free(i); - } - if (node->parent) - c_node_remove_link(&node->parent->childs, node); - node->parent = NULL; - LIST_FOR_EACH_ENTRY_SAFE(link, tmp, &node->predecessors, head) { - c_node_remove_link(&link->node->successors, node); - LIST_DEL(&link->head); - free(link); - } - LIST_FOR_EACH_ENTRY_SAFE(link, tmp, &node->successors, head) { - c_node_remove_link(&link->node->predecessors, node); - LIST_DEL(&link->head); - free(link); - } - LIST_FOR_EACH_ENTRY_SAFE(link, tmp, &node->childs, head) { - link->node->parent = NULL; - LIST_DEL(&link->head); - free(link); - } -} - -void c_shader_destroy(struct c_shader *shader) -{ - struct c_node *n, *nn; - struct c_vector *v, *nv; - unsigned i; - - for (i = 0; i < C_FILE_COUNT; i++) { - shader->files[i].nvectors = 0; - LIST_FOR_EACH_ENTRY_SAFE(v, nv, &shader->files[i].vectors, head) { - LIST_DEL(&v->head); - free(v->channel[0]); - free(v->channel[1]); - free(v->channel[2]); - free(v->channel[3]); - free(v); - } - } - LIST_FOR_EACH_ENTRY_SAFE(n, nn, &shader->nodes, head) { - LIST_DEL(&n->head); - c_node_destroy(n); - } - memset(shader, 0, sizeof(struct c_shader)); -} - -static void c_shader_dfs_without_rec(struct c_node *entry, struct c_node *node) -{ - struct c_node_link *link; - - if (entry == node || entry->visited) - return; - entry->visited = 1; - LIST_FOR_EACH_ENTRY(link, &entry->successors, head) { - c_shader_dfs_without_rec(link->node, node); - } -} - -static void c_shader_dfs_without(struct c_shader *shader, struct c_node *node) -{ - struct c_node *n; - - shader->entry.visited = 0; - shader->end.visited = 0; - LIST_FOR_EACH_ENTRY(n, &shader->nodes, head) { - n->visited = 0; - } - c_shader_dfs_without_rec(&shader->entry, node); -} - -static int c_shader_build_dominator_tree_rec(struct c_shader *shader, struct c_node *node) -{ - struct c_node_link *link, *nlink; - unsigned found = 0; - int r; - - if (node->done) - return 0; - node->done = 1; - LIST_FOR_EACH_ENTRY(link, &node->predecessors, head) { - /* if we remove this predecessor can we reach the current node ? */ - c_shader_dfs_without(shader, link->node); - if (node->visited == 0) { - /* we were unable to visit current node thus current - * predecessor is the immediate dominator of node, as - * their can be only one immediate dominator we break - */ - node->parent = link->node; - nlink = c_node_link_new(node); - if (nlink == NULL) - return -ENOMEM; - LIST_ADDTAIL(&nlink->head, &link->node->childs); - found = 1; - break; - } - } - /* this shouldn't happen there should at least be 1 denominator for each node */ - if (!found && node->opcode != C_OPCODE_ENTRY) { - fprintf(stderr, "invalid flow control graph node %p (%d) has no immediate dominator\n", - node, node->opcode); - return -EINVAL; - } - LIST_FOR_EACH_ENTRY(link, &node->predecessors, head) { - r = c_shader_build_dominator_tree_rec(shader, link->node); - if (r) - return r; - } - return 0; -} - -int c_shader_build_dominator_tree(struct c_shader *shader) -{ - struct c_node *node; - LIST_FOR_EACH_ENTRY(node, &shader->nodes, head) { - node->done = 0; - } - return c_shader_build_dominator_tree_rec(shader, &shader->end); -} diff --git a/src/gallium/drivers/r600/r600_compiler.h b/src/gallium/drivers/r600/r600_compiler.h deleted file mode 100644 index 77230aed73..0000000000 --- a/src/gallium/drivers/r600/r600_compiler.h +++ /dev/null @@ -1,320 +0,0 @@ -/* - * Copyright 2010 Jerome Glisse - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - */ -#ifndef R600_COMPILER_H -#define R600_COMPILER_H - -#include "util/u_double_list.h" - -struct c_vector; - -/* operand are the basic source/destination of each operation */ -struct c_channel { - struct list_head head; - unsigned vindex; /**< index in vector X,Y,Z,W (0,1,2,3) */ - unsigned value; /**< immediate value 32bits */ - struct c_vector *vector; /**< vector to which it belongs */ -}; - -/* in GPU world most of the time operand are grouped into vector - * of 4 component this structure is mostly and handler to group - * operand into a same vector - */ -struct c_vector { - struct list_head head; - unsigned id; /**< vector uniq id */ - unsigned name; /**< semantic name */ - unsigned file; /**< operand file C_FILE_* */ - int sid; /**< semantic id */ - struct c_channel *channel[4]; /**< operands */ -}; - -#define C_PROGRAM_TYPE_VS 0 -#define C_PROGRAM_TYPE_FS 1 -#define C_PROGRAM_TYPE_COUNT 2 - -#define C_NODE_FLAG_ALU 1 -#define C_NODE_FLAG_FETCH 2 - -#define C_SWIZZLE_X 0 -#define C_SWIZZLE_Y 1 -#define C_SWIZZLE_Z 2 -#define C_SWIZZLE_W 3 -#define C_SWIZZLE_0 4 -#define C_SWIZZLE_1 5 -#define C_SWIZZLE_D 6 - -#define C_FILE_NULL 0 -#define C_FILE_CONSTANT 1 -#define C_FILE_INPUT 2 -#define C_FILE_OUTPUT 3 -#define C_FILE_TEMPORARY 4 -#define C_FILE_SAMPLER 5 -#define C_FILE_ADDRESS 6 -#define C_FILE_IMMEDIATE 7 -#define C_FILE_LOOP 8 -#define C_FILE_PREDICATE 9 -#define C_FILE_SYSTEM_VALUE 10 -#define C_FILE_RESOURCE 11 -#define C_FILE_COUNT 12 - -#define C_SEMANTIC_POSITION 0 -#define C_SEMANTIC_COLOR 1 -#define C_SEMANTIC_BCOLOR 2 /**< back-face color */ -#define C_SEMANTIC_FOG 3 -#define C_SEMANTIC_PSIZE 4 -#define C_SEMANTIC_GENERIC 5 -#define C_SEMANTIC_NORMAL 6 -#define C_SEMANTIC_FACE 7 -#define C_SEMANTIC_EDGEFLAG 8 -#define C_SEMANTIC_PRIMID 9 -#define C_SEMANTIC_INSTANCEID 10 -#define C_SEMANTIC_VERTEXID 11 -#define C_SEMANTIC_COUNT 12 /**< number of semantic values */ - -#define C_OPCODE_NOP 0 -#define C_OPCODE_MOV 1 -#define C_OPCODE_LIT 2 -#define C_OPCODE_RCP 3 -#define C_OPCODE_RSQ 4 -#define C_OPCODE_EXP 5 -#define C_OPCODE_LOG 6 -#define C_OPCODE_MUL 7 -#define C_OPCODE_ADD 8 -#define C_OPCODE_DP3 9 -#define C_OPCODE_DP4 10 -#define C_OPCODE_DST 11 -#define C_OPCODE_MIN 12 -#define C_OPCODE_MAX 13 -#define C_OPCODE_SLT 14 -#define C_OPCODE_SGE 15 -#define C_OPCODE_MAD 16 -#define C_OPCODE_SUB 17 -#define C_OPCODE_LRP 18 -#define C_OPCODE_CND 19 -/* gap */ -#define C_OPCODE_DP2A 21 -/* gap */ -#define C_OPCODE_FRC 24 -#define C_OPCODE_CLAMP 25 -#define C_OPCODE_FLR 26 -#define C_OPCODE_ROUND 27 -#define C_OPCODE_EX2 28 -#define C_OPCODE_LG2 29 -#define C_OPCODE_POW 30 -#define C_OPCODE_XPD 31 -/* gap */ -#define C_OPCODE_ABS 33 -#define C_OPCODE_RCC 34 -#define C_OPCODE_DPH 35 -#define C_OPCODE_COS 36 -#define C_OPCODE_DDX 37 -#define C_OPCODE_DDY 38 -#define C_OPCODE_KILP 39 /* predicated kill */ -#define C_OPCODE_PK2H 40 -#define C_OPCODE_PK2US 41 -#define C_OPCODE_PK4B 42 -#define C_OPCODE_PK4UB 43 -#define C_OPCODE_RFL 44 -#define C_OPCODE_SEQ 45 -#define C_OPCODE_SFL 46 -#define C_OPCODE_SGT 47 -#define C_OPCODE_SIN 48 -#define C_OPCODE_SLE 49 -#define C_OPCODE_SNE 50 -#define C_OPCODE_STR 51 -#define C_OPCODE_TEX 52 -#define C_OPCODE_TXD 53 -#define C_OPCODE_TXP 54 -#define C_OPCODE_UP2H 55 -#define C_OPCODE_UP2US 56 -#define C_OPCODE_UP4B 57 -#define C_OPCODE_UP4UB 58 -#define C_OPCODE_X2D 59 -#define C_OPCODE_ARA 60 -#define C_OPCODE_ARR 61 -#define C_OPCODE_BRA 62 -#define C_OPCODE_CAL 63 -#define C_OPCODE_RET 64 -#define C_OPCODE_SSG 65 /* SGN */ -#define C_OPCODE_CMP 66 -#define C_OPCODE_SCS 67 -#define C_OPCODE_TXB 68 -#define C_OPCODE_NRM 69 -#define C_OPCODE_DIV 70 -#define C_OPCODE_DP2 71 -#define C_OPCODE_TXL 72 -#define C_OPCODE_BRK 73 -#define C_OPCODE_IF 74 -#define C_OPCODE_BGNFOR 75 -#define C_OPCODE_REP 76 -#define C_OPCODE_ELSE 77 -#define C_OPCODE_ENDIF 78 -#define C_OPCODE_ENDFOR 79 -#define C_OPCODE_ENDREP 80 -#define C_OPCODE_PUSHA 81 -#define C_OPCODE_POPA 82 -#define C_OPCODE_CEIL 83 -#define C_OPCODE_I2F 84 -#define C_OPCODE_NOT 85 -#define C_OPCODE_TRUNC 86 -#define C_OPCODE_SHL 87 -/* gap */ -#define C_OPCODE_AND 89 -#define C_OPCODE_OR 90 -#define C_OPCODE_MOD 91 -#define C_OPCODE_XOR 92 -#define C_OPCODE_SAD 93 -#define C_OPCODE_TXF 94 -#define C_OPCODE_TXQ 95 -#define C_OPCODE_CONT 96 -#define C_OPCODE_EMIT 97 -#define C_OPCODE_ENDPRIM 98 -#define C_OPCODE_BGNLOOP 99 -#define C_OPCODE_BGNSUB 100 -#define C_OPCODE_ENDLOOP 101 -#define C_OPCODE_ENDSUB 102 -/* gap */ -#define C_OPCODE_NRM4 112 -#define C_OPCODE_CALLNZ 113 -#define C_OPCODE_IFC 114 -#define C_OPCODE_BREAKC 115 -#define C_OPCODE_KIL 116 /* conditional kill */ -#define C_OPCODE_END 117 /* aka HALT */ -/* gap */ -#define C_OPCODE_F2I 119 -#define C_OPCODE_IDIV 120 -#define C_OPCODE_IMAX 121 -#define C_OPCODE_IMIN 122 -#define C_OPCODE_INEG 123 -#define C_OPCODE_ISGE 124 -#define C_OPCODE_ISHR 125 -#define C_OPCODE_ISLT 126 -#define C_OPCODE_F2U 127 -#define C_OPCODE_U2F 128 -#define C_OPCODE_UADD 129 -#define C_OPCODE_UDIV 130 -#define C_OPCODE_UMAD 131 -#define C_OPCODE_UMAX 132 -#define C_OPCODE_UMIN 133 -#define C_OPCODE_UMOD 134 -#define C_OPCODE_UMUL 135 -#define C_OPCODE_USEQ 136 -#define C_OPCODE_USGE 137 -#define C_OPCODE_USHR 138 -#define C_OPCODE_USLT 139 -#define C_OPCODE_USNE 140 -#define C_OPCODE_SWITCH 141 -#define C_OPCODE_CASE 142 -#define C_OPCODE_DEFAULT 143 -#define C_OPCODE_ENDSWITCH 144 -#define C_OPCODE_VFETCH 145 -#define C_OPCODE_ENTRY 146 -#define C_OPCODE_ARL 147 -#define C_OPCODE_LAST 148 - -#define C_OPERAND_FLAG_ABS (1 << 0) -#define C_OPERAND_FLAG_NEG (1 << 1) - -struct c_operand { - struct c_vector *vector; - unsigned swizzle; - unsigned flag; -}; - -struct c_op { - unsigned ninput; - struct c_operand input[3]; - struct c_operand output; - unsigned opcode; -}; - -struct c_instruction { - struct list_head head; - unsigned nop; - struct c_op op[5]; -}; - -struct c_node; - -struct c_node_link { - struct list_head head; - struct c_node *node; -}; - -/** - * struct c_node - * - * @next: all node are in a double linked list, this point to - * next node - * @next: all node are in a double linked list, this point to - * previous node - * @predecessors: list of all predecessor nodes in the flow graph - * @successors: list of all sucessor nodes in the flow graph - * @parent: parent node in the depth first walk tree - * @childs: child nodes in the depth first walk tree - */ -struct c_node { - struct list_head head; - struct list_head predecessors; - struct list_head successors; - struct list_head childs; - struct c_node *parent; - struct list_head insts; - unsigned opcode; - unsigned visited; - unsigned done; - void *backend; -}; - -struct c_file { - unsigned nvectors; - struct list_head vectors; -}; - -struct c_shader { - unsigned nvectors; - struct c_file files[C_FILE_COUNT]; - struct list_head nodes; - struct c_node entry; - struct c_node end; - unsigned type; -}; - -int c_shader_init(struct c_shader *shader, unsigned type); -void c_shader_destroy(struct c_shader *shader); -struct c_vector *c_shader_vector_new(struct c_shader *shader, unsigned file, unsigned name, int sid); -int c_shader_build_dominator_tree(struct c_shader *shader); -void c_shader_dump(struct c_shader *shader); - -void c_node_init(struct c_node *node); -int c_node_add_new_instruction(struct c_node *node, struct c_instruction *instruction); -int c_node_add_new_instruction_head(struct c_node *node, struct c_instruction *instruction); - -/* control flow graph functions */ -int c_node_cfg_link(struct c_node *predecessor, struct c_node *successor); -struct c_node *c_node_cfg_new_after(struct c_node *predecessor); -struct c_node *c_shader_cfg_new_node_after(struct c_shader *shader, struct c_node *predecessor); - -struct c_vector *c_vector_new(void); - -#endif diff --git a/src/gallium/drivers/r600/r600_compiler_dump.c b/src/gallium/drivers/r600/r600_compiler_dump.c deleted file mode 100644 index bb022b7c29..0000000000 --- a/src/gallium/drivers/r600/r600_compiler_dump.c +++ /dev/null @@ -1,267 +0,0 @@ -/* - * Copyright 2010 Jerome Glisse - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - */ -#include -#include -#include -#include -#include "r600_compiler.h" - -static const char *c_file_swz[] = { - "x", - "y", - "z", - "w", - "0", - "1", - ".", -}; - -static const char *c_file_str[] = { - "NULL", - "CONSTANT", - "INPUT", - "OUTPUT", - "TEMPORARY", - "SAMPLER", - "ADDRESS", - "IMMEDIATE", - "LOOP", - "PREDICATE", - "SYSTEM_VALUE", -}; - -static const char *c_semantic_str[] = { - "POSITION", - "COLOR", - "BCOLOR", - "FOG", - "PSIZE", - "GENERIC", - "NORMAL", - "FACE", - "EDGEFLAG", - "PRIMID", - "INSTANCEID", -}; - -static const char *c_opcode_str[] = { - "ARL", - "MOV", - "LIT", - "RCP", - "RSQ", - "EXP", - "LOG", - "MUL", - "ADD", - "DP3", - "DP4", - "DST", - "MIN", - "MAX", - "SLT", - "SGE", - "MAD", - "SUB", - "LRP", - "CND", - "(INVALID)", - "DP2A", - "(INVALID)", - "(INVALID)", - "FRC", - "CLAMP", - "FLR", - "ROUND", - "EX2", - "LG2", - "POW", - "XPD", - "(INVALID)", - "ABS", - "RCC", - "DPH", - "COS", - "DDX", - "DDY", - "KILP", - "PK2H", - "PK2US", - "PK4B", - "PK4UB", - "RFL", - "SEQ", - "SFL", - "SGT", - "SIN", - "SLE", - "SNE", - "STR", - "TEX", - "TXD", - "TXP", - "UP2H", - "UP2US", - "UP4B", - "UP4UB", - "X2D", - "ARA", - "ARR", - "BRA", - "CAL", - "RET", - "SSG", - "CMP", - "SCS", - "TXB", - "NRM", - "DIV", - "DP2", - "TXL", - "BRK", - "IF", - "BGNFOR", - "REP", - "ELSE", - "ENDIF", - "ENDFOR", - "ENDREP", - "PUSHA", - "POPA", - "CEIL", - "I2F", - "NOT", - "TRUNC", - "SHL", - "(INVALID)", - "AND", - "OR", - "MOD", - "XOR", - "SAD", - "TXF", - "TXQ", - "CONT", - "EMIT", - "ENDPRIM", - "BGNLOOP", - "BGNSUB", - "ENDLOOP", - "ENDSUB", - "(INVALID)", - "(INVALID)", - "(INVALID)", - "(INVALID)", - "NOP", - "(INVALID)", - "(INVALID)", - "(INVALID)", - "(INVALID)", - "NRM4", - "CALLNZ", - "IFC", - "BREAKC", - "KIL", - "END", - "(INVALID)", - "F2I", - "IDIV", - "IMAX", - "IMIN", - "INEG", - "ISGE", - "ISHR", - "ISLT", - "F2U", - "U2F", - "UADD", - "UDIV", - "UMAD", - "UMAX", - "UMIN", - "UMOD", - "UMUL", - "USEQ", - "USGE", - "USHR", - "USLT", - "USNE", - "SWITCH", - "CASE", - "DEFAULT", - "ENDSWITCH", - "VFETCH", - "ENTRY", -}; - -static inline const char *c_get_name(const char *name[], unsigned i) -{ - return name[i]; -} - -static void pindent(unsigned indent) -{ - unsigned i; - for (i = 0; i < indent; i++) - fprintf(stderr, " "); -} - -static void c_node_dump(struct c_node *node, unsigned indent) -{ - struct c_instruction *i; - unsigned j, k; - - pindent(indent); fprintf(stderr, "# node %s\n", c_get_name(c_opcode_str, node->opcode)); - LIST_FOR_EACH_ENTRY(i, &node->insts, head) { - for (k = 0; k < i->nop; k++) { - pindent(indent); - fprintf(stderr, "%s", c_get_name(c_opcode_str, i->op[k].opcode)); - fprintf(stderr, " %s[%d][%s]", - c_get_name(c_file_str, i->op[k].output.vector->file), - i->op[k].output.vector->id, - c_get_name(c_file_swz, i->op[k].output.swizzle)); - for (j = 0; j < i->op[k].ninput; j++) { - fprintf(stderr, " %s[%d][%s]", - c_get_name(c_file_str, i->op[k].input[j].vector->file), - i->op[k].input[j].vector->id, - c_get_name(c_file_swz, i->op[k].input[j].swizzle)); - } - fprintf(stderr, ";\n"); - } - } -} - -static void c_shader_dump_rec(struct c_shader *shader, struct c_node *node, unsigned indent) -{ - struct c_node_link *link; - - c_node_dump(node, indent); - LIST_FOR_EACH_ENTRY(link, &node->childs, head) { - c_shader_dump_rec(shader, link->node, indent + 1); - } -} - -void c_shader_dump(struct c_shader *shader) -{ - c_shader_dump_rec(shader, &shader->entry, 0); -} diff --git a/src/gallium/drivers/r600/r600_compiler_r600.c b/src/gallium/drivers/r600/r600_compiler_r600.c deleted file mode 100644 index 27ad8f1a18..0000000000 --- a/src/gallium/drivers/r600/r600_compiler_r600.c +++ /dev/null @@ -1,972 +0,0 @@ -/* - * Copyright 2010 Jerome Glisse - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - */ -#include -#include -#include -#include -#include -#include -#include "r600_screen.h" -#include "r600_context.h" -#include "r600_sq.h" - - -struct r600_alu_instruction { - unsigned copcode; - enum r600_instruction instruction; -}; - -static int r600_shader_alu_translate(struct r600_shader *rshader, - struct r600_shader_node *node, - struct c_instruction *instruction); -struct r600_alu_instruction r600_alu_instruction[C_OPCODE_LAST]; -struct r600_instruction_info r600_instruction_info[]; - -int r600_shader_insert_fetch(struct c_shader *shader) -{ - struct c_vector *vi, *vr, *v, *nv; - struct c_instruction instruction; - int r; - - if (shader->type != C_PROGRAM_TYPE_VS) - return 0; - vi = c_shader_vector_new(shader, C_FILE_INPUT, C_SEMANTIC_VERTEXID, -1); - if (vi == NULL) - return -ENOMEM; - LIST_FOR_EACH_ENTRY_SAFE(v, nv, &shader->files[C_FILE_INPUT].vectors, head) { - if (v == vi) - continue; - vr = c_shader_vector_new(shader, C_FILE_RESOURCE, C_SEMANTIC_GENERIC, -1); - if (vr == NULL) - return -ENOMEM; - memset(&instruction, 0, sizeof(struct c_instruction)); - instruction.nop = 4; - instruction.op[0].opcode = C_OPCODE_VFETCH; - instruction.op[1].opcode = C_OPCODE_VFETCH; - instruction.op[2].opcode = C_OPCODE_VFETCH; - instruction.op[3].opcode = C_OPCODE_VFETCH; - instruction.op[0].ninput = 2; - instruction.op[1].ninput = 2; - instruction.op[2].ninput = 2; - instruction.op[3].ninput = 2; - instruction.op[0].output.vector = v; - instruction.op[1].output.vector = v; - instruction.op[2].output.vector = v; - instruction.op[3].output.vector = v; - instruction.op[0].input[0].vector = vi; - instruction.op[0].input[1].vector = vr; - instruction.op[1].input[0].vector = vi; - instruction.op[1].input[1].vector = vr; - instruction.op[2].input[0].vector = vi; - instruction.op[2].input[1].vector = vr; - instruction.op[3].input[0].vector = vi; - instruction.op[3].input[1].vector = vr; - instruction.op[0].output.swizzle = C_SWIZZLE_X; - instruction.op[1].output.swizzle = C_SWIZZLE_Y; - instruction.op[2].output.swizzle = C_SWIZZLE_Z; - instruction.op[3].output.swizzle = C_SWIZZLE_W; - r = c_node_add_new_instruction_head(&shader->entry, &instruction); - if (r) - return r; - LIST_DEL(&v->head); - shader->files[C_FILE_INPUT].nvectors--; - LIST_ADDTAIL(&v->head, &shader->files[C_FILE_TEMPORARY].vectors); - shader->files[C_FILE_TEMPORARY].nvectors++; - v->file = C_FILE_TEMPORARY; - } - return 0; -} - -void r600_shader_cleanup(struct r600_shader *rshader) -{ - struct r600_shader_node *n, *nn; - struct r600_shader_vfetch *vf, *nvf; - struct r600_shader_alu *alu, *nalu; - int i; - - if (rshader == NULL) - return; - if (rshader->gpr) { - for (i = 0; i < rshader->nvector; i++) { - free(rshader->gpr[i]); - } - free(rshader->gpr); - rshader->gpr = NULL; - } - LIST_FOR_EACH_ENTRY_SAFE(n, nn, &rshader->nodes, head) { - LIST_DEL(&n->head); - LIST_FOR_EACH_ENTRY_SAFE(vf, nvf, &n->vfetch, head) { - LIST_DEL(&vf->head); - free(vf); - } - LIST_FOR_EACH_ENTRY_SAFE(alu, nalu, &n->alu, head) { - LIST_DEL(&alu->head); - free(alu); - } - free(n); - } - free(rshader->bcode); - return; -} - -int r600_shader_vfetch_bytecode(struct r600_shader *rshader, - struct r600_shader_node *rnode, - struct r600_shader_vfetch *vfetch, - unsigned *cid) -{ - unsigned id = *cid; - - vfetch->cf_addr = id; - rshader->bcode[id++] = S_SQ_VTX_WORD0_BUFFER_ID(vfetch->src[1].sel) | - S_SQ_VTX_WORD0_SRC_GPR(vfetch->src[0].sel) | - S_SQ_VTX_WORD0_SRC_SEL_X(vfetch->src[0].sel) | - S_SQ_VTX_WORD0_MEGA_FETCH_COUNT(0x1F); - rshader->bcode[id++] = S_SQ_VTX_WORD1_DST_SEL_X(vfetch->dst[0].chan) | - S_SQ_VTX_WORD1_DST_SEL_Y(vfetch->dst[1].chan) | - S_SQ_VTX_WORD1_DST_SEL_Z(vfetch->dst[2].chan) | - S_SQ_VTX_WORD1_DST_SEL_W(vfetch->dst[3].chan) | - S_SQ_VTX_WORD1_USE_CONST_FIELDS(1) | - S_SQ_VTX_WORD1_GPR_DST_GPR(vfetch->dst[0].sel); - rshader->bcode[id++] = S_SQ_VTX_WORD2_MEGA_FETCH(1); - rshader->bcode[id++] = 0; - *cid = id; - return 0; -} - -int r600_shader_update(struct r600_shader *rshader, enum pipe_format *resource_format) -{ - struct r600_shader_node *rnode; - struct r600_shader_vfetch *vfetch; - unsigned i; - - memcpy(rshader->resource_format, resource_format, - rshader->nresource * sizeof(enum pipe_format)); - LIST_FOR_EACH_ENTRY(rnode, &rshader->nodes, head) { - LIST_FOR_EACH_ENTRY(vfetch, &rnode->vfetch, head) { - const struct util_format_description *desc; - i = vfetch->cf_addr + 1; - rshader->bcode[i] &= C_SQ_VTX_WORD1_DST_SEL_X; - rshader->bcode[i] &= C_SQ_VTX_WORD1_DST_SEL_Y; - rshader->bcode[i] &= C_SQ_VTX_WORD1_DST_SEL_Z; - rshader->bcode[i] &= C_SQ_VTX_WORD1_DST_SEL_W; - desc = util_format_description(resource_format[vfetch->src[1].sel]); - if (desc == NULL) { - fprintf(stderr, "%s unknown format %d\n", __func__, resource_format[vfetch->src[1].sel]); - continue; - } - /* WARNING so far TGSI swizzle match R600 ones */ - rshader->bcode[i] |= S_SQ_VTX_WORD1_DST_SEL_X(desc->swizzle[0]); - rshader->bcode[i] |= S_SQ_VTX_WORD1_DST_SEL_Y(desc->swizzle[1]); - rshader->bcode[i] |= S_SQ_VTX_WORD1_DST_SEL_Z(desc->swizzle[2]); - rshader->bcode[i] |= S_SQ_VTX_WORD1_DST_SEL_W(desc->swizzle[3]); - } - } - return 0; -} - -int r600_shader_register(struct r600_shader *rshader) -{ - struct c_vector *v, *nv; - unsigned tid, cid, rid, i; - - rshader->nvector = rshader->cshader.nvectors; - rshader->gpr = calloc(rshader->nvector, sizeof(void*)); - if (rshader->gpr == NULL) - return -ENOMEM; - tid = 0; - cid = 0; - rid = 0; - /* alloc input first */ - LIST_FOR_EACH_ENTRY(v, &rshader->cshader.files[C_FILE_INPUT].vectors, head) { - nv = c_vector_new(); - if (nv == NULL) { - return -ENOMEM; - } - memcpy(nv, v, sizeof(struct c_vector)); - nv->id = tid++; - rshader->gpr[v->id] = nv; - } - for (i = 0; i < C_FILE_COUNT; i++) { - if (i == C_FILE_INPUT || i == C_FILE_IMMEDIATE) - continue; - LIST_FOR_EACH_ENTRY(v, &rshader->cshader.files[i].vectors, head) { - switch (v->file) { - case C_FILE_OUTPUT: - case C_FILE_TEMPORARY: - nv = c_vector_new(); - if (nv == NULL) { - return -ENOMEM; - } - memcpy(nv, v, sizeof(struct c_vector)); - nv->id = tid++; - rshader->gpr[v->id] = nv; - break; - case C_FILE_CONSTANT: - nv = c_vector_new(); - if (nv == NULL) { - return -ENOMEM; - } - memcpy(nv, v, sizeof(struct c_vector)); - nv->id = (cid++) + 256; - rshader->gpr[v->id] = nv; - break; - case C_FILE_RESOURCE: - nv = c_vector_new(); - if (nv == NULL) { - return -ENOMEM; - } - memcpy(nv, v, sizeof(struct c_vector)); - nv->id = (rid++); - rshader->gpr[v->id] = nv; - break; - default: - fprintf(stderr, "%s:%d unsupported file %d\n", __func__, __LINE__, v->file); - return -EINVAL; - } - } - } - rshader->ngpr = tid; - rshader->nconstant = cid; - rshader->nresource = rid; - return 0; -} - -int r600_shader_find_gpr(struct r600_shader *rshader, struct c_vector *v, unsigned swizzle, - struct r600_shader_operand *operand) -{ - struct c_vector *tmp; - - /* Values [0,127] correspond to GPR[0..127]. - * Values [256,511] correspond to cfile constants c[0..255]. - * Other special values are shown in the list below. - * 248 SQ_ALU_SRC_0: special constant 0.0. - * 249 SQ_ALU_SRC_1: special constant 1.0 float. - * 250 SQ_ALU_SRC_1_INT: special constant 1 integer. - * 251 SQ_ALU_SRC_M_1_INT: special constant -1 integer. - * 252 SQ_ALU_SRC_0_5: special constant 0.5 float. - * 253 SQ_ALU_SRC_LITERAL: literal constant. - * 254 SQ_ALU_SRC_PV: previous vector result. - * 255 SQ_ALU_SRC_PS: previous scalar result. - */ - operand->vector = v; - operand->sel = 248; - operand->chan = 0; - operand->neg = 0; - operand->abs = 0; - if (v == NULL) - return 0; - if (v->file == C_FILE_IMMEDIATE) { - operand->sel = 253; - } else { - tmp = rshader->gpr[v->id]; - if (tmp == NULL) { - fprintf(stderr, "%s %d unknown register\n", __FILE__, __LINE__); - return -EINVAL; - } - operand->sel = tmp->id; - } - operand->chan = swizzle; - switch (swizzle) { - case C_SWIZZLE_X: - case C_SWIZZLE_Y: - case C_SWIZZLE_Z: - case C_SWIZZLE_W: - break; - case C_SWIZZLE_0: - operand->sel = 248; - operand->chan = 0; - break; - case C_SWIZZLE_1: - operand->sel = 249; - operand->chan = 0; - break; - default: - fprintf(stderr, "%s %d invalid swizzle %d\n", __FILE__, __LINE__, swizzle); - return -EINVAL; - } - return 0; -} - -static struct r600_shader_node *r600_shader_new_node(struct r600_shader *rshader, struct c_node *node) -{ - struct r600_shader_node *rnode; - - rnode = CALLOC_STRUCT(r600_shader_node); - if (rnode == NULL) - return NULL; - rnode->node = node; - LIST_INITHEAD(&rnode->vfetch); -fprintf(stderr, "------------------------ new node (%p %p)\n", &rnode->vfetch, rnode->vfetch.next); - LIST_INITHEAD(&rnode->alu); - LIST_ADDTAIL(&rnode->head, &rshader->nodes); - return rnode; -} - -static int r600_shader_add_vfetch(struct r600_shader *rshader, - struct r600_shader_node *node, - struct c_instruction *instruction) -{ - struct r600_shader_vfetch *vfetch; - struct r600_shader_node *rnode; - int r; - - if (instruction == NULL) - return 0; - if (instruction->op[0].opcode != C_OPCODE_VFETCH) - return 0; - if (!LIST_IS_EMPTY(&node->alu)) { - rnode = r600_shader_new_node(rshader, node->node); - if (rnode == NULL) - return -ENOMEM; - node = rnode; - } - vfetch = calloc(1, sizeof(struct r600_shader_vfetch)); - if (vfetch == NULL) - return -ENOMEM; - r = r600_shader_find_gpr(rshader, instruction->op[0].output.vector, 0, &vfetch->dst[0]); - if (r) - return r; - r = r600_shader_find_gpr(rshader, instruction->op[0].input[0].vector, 0, &vfetch->src[0]); - if (r) - return r; - r = r600_shader_find_gpr(rshader, instruction->op[0].input[1].vector, 0, &vfetch->src[1]); - if (r) - return r; - vfetch->dst[0].chan = C_SWIZZLE_X; - vfetch->dst[1].chan = C_SWIZZLE_Y; - vfetch->dst[2].chan = C_SWIZZLE_Z; - vfetch->dst[3].chan = C_SWIZZLE_W; - LIST_ADDTAIL(&vfetch->head, &node->vfetch); - node->nslot += 2; - return 0; -} - -static int r600_node_translate(struct r600_shader *rshader, struct c_node *node) -{ - struct c_instruction *instruction; - struct r600_shader_node *rnode; - int r; - - rnode = r600_shader_new_node(rshader, node); - if (rnode == NULL) - return -ENOMEM; - LIST_FOR_EACH_ENTRY(instruction, &node->insts, head) { - switch (instruction->op[0].opcode) { - case C_OPCODE_VFETCH: - r = r600_shader_add_vfetch(rshader, rnode, instruction); - if (r) { - fprintf(stderr, "%s %d vfetch failed\n", __func__, __LINE__); - return r; - } - break; - default: - r = r600_shader_alu_translate(rshader, rnode, instruction); - if (r) { - fprintf(stderr, "%s %d alu failed\n", __func__, __LINE__); - return r; - } - break; - } - } - return 0; -} - -int r600_shader_translate_rec(struct r600_shader *rshader, struct c_node *node) -{ - struct c_node_link *link; - int r; - - if (node->opcode == C_OPCODE_END) - return 0; - r = r600_node_translate(rshader, node); - if (r) - return r; - LIST_FOR_EACH_ENTRY(link, &node->childs, head) { - r = r600_shader_translate_rec(rshader, link->node); - if (r) - return r; - } - return 0; -} - -static struct r600_shader_alu *r600_shader_insert_alu(struct r600_shader *rshader, struct r600_shader_node *node) -{ - struct r600_shader_alu *alu; - - alu = CALLOC_STRUCT(r600_shader_alu); - if (alu == NULL) - return NULL; - alu->alu[0].inst = INST_NOP; - alu->alu[1].inst = INST_NOP; - alu->alu[2].inst = INST_NOP; - alu->alu[3].inst = INST_NOP; - alu->alu[4].inst = INST_NOP; - alu->alu[0].opcode = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP; - alu->alu[1].opcode = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP; - alu->alu[2].opcode = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP; - alu->alu[3].opcode = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP; - alu->alu[4].opcode = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP; - alu->alu[1].dst.chan = 1; - alu->alu[2].dst.chan = 2; - alu->alu[3].dst.chan = 3; - LIST_ADDTAIL(&alu->head, &node->alu); - return alu; -} - -static int r600_shader_alu_translate(struct r600_shader *rshader, - struct r600_shader_node *node, - struct c_instruction *instruction) -{ - struct r600_shader_node *rnode; - struct r600_shader_alu *alu; - int i, j, r, litteral_lastcomp = -1; - - if (!LIST_IS_EMPTY(&node->vfetch)) { -fprintf(stderr, "------------------------ add node (%p %p)\n", &node->vfetch, node->vfetch.next); - rnode = r600_shader_new_node(rshader, node->node); - if (rnode == NULL) { - fprintf(stderr, "%s %d new node failed\n", __func__, __LINE__); - return -ENOMEM; - } - node = rnode; - } - - /* initialize alu */ - alu = r600_shader_insert_alu(rshader, node); - - /* check special operation like lit */ - - /* go through operation */ - for (i = 0; i < instruction->nop; i++) { - struct r600_alu_instruction *ainfo = &r600_alu_instruction[instruction->op[i].opcode]; - struct r600_instruction_info *iinfo = &r600_instruction_info[ainfo->instruction]; - unsigned comp; - - /* check that output is a valid component */ - comp = instruction->op[i].output.swizzle; - switch (comp) { - case C_SWIZZLE_X: - case C_SWIZZLE_Y: - case C_SWIZZLE_Z: - case C_SWIZZLE_W: - break; - case C_SWIZZLE_0: - case C_SWIZZLE_1: - default: - fprintf(stderr, "%s %d invalid output %d\n", __func__, __LINE__, comp); - return -EINVAL; - } - alu->alu[comp].inst = ainfo->instruction; - alu->alu[comp].opcode = iinfo->opcode; - alu->alu[comp].is_op3 = iinfo->is_op3; - for (j = 0; j < instruction->op[i].ninput; j++) { - r = r600_shader_find_gpr(rshader, instruction->op[i].input[j].vector, - instruction->op[i].input[j].swizzle, &alu->alu[comp].src[j]); - if (r) { - fprintf(stderr, "%s %d register failed\n", __FILE__, __LINE__); - return r; - } - if (instruction->op[i].input[j].vector->file == C_FILE_IMMEDIATE) { - r = instruction->op[i].input[j].swizzle; - switch (r) { - case C_SWIZZLE_X: - case C_SWIZZLE_Y: - case C_SWIZZLE_Z: - case C_SWIZZLE_W: - break; - case C_SWIZZLE_0: - case C_SWIZZLE_1: - default: - fprintf(stderr, "%s %d invalid input\n", __func__, __LINE__); - return -EINVAL; - } - alu->literal[r] = instruction->op[i].input[j].vector->channel[r]->value; - if (r > litteral_lastcomp) { - litteral_lastcomp = r; - } - } - } - r = r600_shader_find_gpr(rshader, instruction->op[i].output.vector, - instruction->op[i].output.swizzle, &alu->alu[comp].dst); - if (r) { - fprintf(stderr, "%s %d register failed\n", __FILE__, __LINE__); - return r; - } - } - switch (litteral_lastcomp) { - case 0: - case 1: - alu->nliteral = 2; - break; - case 2: - case 3: - alu->nliteral = 4; - break; - case -1: - default: - break; - } - for (i = 4; i >= 0; i--) { - if (alu->alu[i].inst != INST_NOP) { - alu->alu[i].last = 1; - alu->nalu = i + 1; - break; - } - } - return 0; -} - -void r600_shader_node_place(struct r600_shader *rshader) -{ - struct r600_shader_node *node, *nnode; - struct r600_shader_alu *alu, *nalu; - struct r600_shader_vfetch *vfetch, *nvfetch; - unsigned cf_id = 0, cf_addr = 0; - - rshader->ncf = 0; - rshader->nslot = 0; - LIST_FOR_EACH_ENTRY_SAFE(node, nnode, &rshader->nodes, head) { - LIST_FOR_EACH_ENTRY_SAFE(alu, nalu, &node->alu, head) { - node->nslot += alu->nalu; - node->nslot += alu->nliteral >> 1; - } - node->nfetch = 0; - LIST_FOR_EACH_ENTRY_SAFE(vfetch, nvfetch, &node->vfetch, head) { - node->nslot += 2; - node->nfetch += 1; - } - if (!LIST_IS_EMPTY(&node->vfetch)) { - /* fetch node need to be 16 bytes aligned*/ - cf_addr += 1; - cf_addr &= 0xFFFFFFFEUL; - } - node->cf_id = cf_id; - node->cf_addr = cf_addr; - cf_id += 2; - cf_addr += node->nslot * 2; - rshader->ncf++; - } - rshader->nslot = cf_addr; - LIST_FOR_EACH_ENTRY_SAFE(node, nnode, &rshader->nodes, head) { - node->cf_addr += cf_id * 2; - } - rshader->ncf += rshader->cshader.files[C_FILE_OUTPUT].nvectors; - rshader->ndw = rshader->ncf * 2 + rshader->nslot * 2; -} - -int r600_shader_legalize(struct r600_shader *rshader) -{ - return 0; -} - - -static int r600_cshader_legalize_rec(struct c_shader *shader, struct c_node *node) -{ - struct c_node_link *link; - struct c_instruction *i, *n; - struct c_operand operand; - unsigned k, inst; - int r; - - LIST_FOR_EACH_ENTRY(i, &node->insts, head) { - for (k = 0; k < i->nop; k++) { - switch (i->op[k].opcode) { - case C_OPCODE_SLT: - i->op[k].opcode = C_OPCODE_SGT; - memcpy(&operand, &i->op[k].input[0], sizeof(struct c_operand)); - memcpy(&i->op[k].input[0], &i->op[k].input[1], sizeof(struct c_operand)); - memcpy(&i->op[k].input[1], &operand, sizeof(struct c_operand)); - break; - default: - break; - } - inst = r600_alu_instruction[i->op[k].opcode].instruction; - if (r600_instruction_info[inst].is_trans && k < (i->nop -1)) { - /* split trans opcode */ - n = CALLOC_STRUCT(c_instruction); - if (n == NULL) - return -ENOMEM; - for (n->nop = 0, k = k + 1; k < i->nop; k++, n->nop++) { - memcpy(&n->op[n->nop - 0], &i->op[k], sizeof(struct c_op)); - } - i->nop -= n->nop; - LIST_ADD(&n->head, &i->head); - } - } - } - LIST_FOR_EACH_ENTRY(link, &node->childs, head) { - r = r600_cshader_legalize_rec(shader, link->node); - if (r) { - return r; - } - } - return 0; -} - -int r600_cshader_legalize(struct c_shader *shader) -{ - return r600_cshader_legalize_rec(shader, &shader->entry); -} - - -struct r600_instruction_info r600_instruction_info[] = { - {INST_ADD, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, 0, 0}, - {INST_MUL, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, 0, 0}, - {INST_MUL_IEEE, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL_IEEE, 0, 0}, - {INST_MAX, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, 0, 0}, - {INST_MIN, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, 0, 0}, - {INST_MAX_DX10, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX_DX10, 0, 0}, - {INST_MIN_DX10, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN_DX10, 0, 0}, - {INST_SETE, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, 0, 0}, - {INST_SETGT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, 0, 0}, - {INST_SETGE, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, 0, 0}, - {INST_SETNE, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, 0, 0}, - {INST_SETE_DX10, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE_DX10, 0, 0}, - {INST_SETGT_DX10, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT_DX10, 0, 0}, - {INST_SETGE_DX10, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE_DX10, 0, 0}, - {INST_SETNE_DX10, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE_DX10, 0, 0}, - {INST_FRACT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, 0, 0}, - {INST_TRUNC, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, 0, 0}, - {INST_CEIL, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CEIL, 0, 0}, - {INST_RNDNE, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RNDNE, 0, 0}, - {INST_FLOOR, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, 0, 0}, - {INST_MOVA, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA, 0, 0}, - {INST_MOVA_FLOOR, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_FLOOR, 0, 0}, - {INST_MOVA_INT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT, 0, 0}, - {INST_MOV, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, 0, 0}, - {INST_NOP, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, 0, 0}, - {INST_PRED_SETGT_UINT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT_UINT, 0, 0}, - {INST_PRED_SETGE_UINT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE_UINT, 0, 0}, - {INST_PRED_SETE, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE, 0, 0}, - {INST_PRED_SETGT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT, 0, 0}, - {INST_PRED_SETGE, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE, 0, 0}, - {INST_PRED_SETNE, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE, 0, 0}, - {INST_PRED_SET_INV, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SET_INV, 0, 0}, - {INST_PRED_SET_POP, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SET_POP, 0, 0}, - {INST_PRED_SET_CLR, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SET_CLR, 0, 0}, - {INST_PRED_SET_RESTORE, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SET_RESTORE, 0, 0}, - {INST_PRED_SETE_PUSH, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE_PUSH, 0, 0}, - {INST_PRED_SETGT_PUSH, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT_PUSH, 0, 0}, - {INST_PRED_SETGE_PUSH, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE_PUSH, 0, 0}, - {INST_PRED_SETNE_PUSH, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE_PUSH, 0, 0}, - {INST_KILLE, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLE, 0, 0}, - {INST_KILLGT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, 0, 0}, - {INST_KILLGE, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGE, 0, 0}, - {INST_KILLNE, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLNE, 0, 0}, - {INST_AND_INT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_AND_INT, 0, 0}, - {INST_OR_INT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_OR_INT, 0, 0}, - {INST_XOR_INT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_XOR_INT, 0, 0}, - {INST_NOT_INT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOT_INT, 0, 0}, - {INST_ADD_INT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT, 0, 0}, - {INST_SUB_INT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT, 0, 0}, - {INST_MAX_INT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX_INT, 0, 0}, - {INST_MIN_INT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN_INT, 0, 0}, - {INST_MAX_UINT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX_UINT, 0, 0}, - {INST_MIN_UINT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN_UINT, 0, 0}, - {INST_SETE_INT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE_INT, 0, 0}, - {INST_SETGT_INT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT_INT, 0, 0}, - {INST_SETGE_INT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE_INT, 0, 0}, - {INST_SETNE_INT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE_INT, 0, 0}, - {INST_SETGT_UINT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT_UINT, 0, 0}, - {INST_SETGE_UINT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE_UINT, 0, 0}, - {INST_KILLGT_UINT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT_UINT, 0, 0}, - {INST_KILLGE_UINT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGE_UINT, 0, 0}, - {INST_PRED_SETE_INT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE_INT, 0, 0}, - {INST_PRED_SETGT_INT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT_INT, 0, 0}, - {INST_PRED_SETGE_INT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE_INT, 0, 0}, - {INST_PRED_SETNE_INT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE_INT, 0, 0}, - {INST_KILLE_INT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLE_INT, 0, 0}, - {INST_KILLGT_INT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT_INT, 0, 0}, - {INST_KILLGE_INT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGE_INT, 0, 0}, - {INST_KILLNE_INT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLNE_INT, 0, 0}, - {INST_PRED_SETE_PUSH_INT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE_PUSH_INT, 0, 0}, - {INST_PRED_SETGT_PUSH_INT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT_PUSH_INT, 0, 0}, - {INST_PRED_SETGE_PUSH_INT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE_PUSH_INT, 0, 0}, - {INST_PRED_SETNE_PUSH_INT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE_PUSH_INT, 0, 0}, - {INST_PRED_SETLT_PUSH_INT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETLT_PUSH_INT, 0, 0}, - {INST_PRED_SETLE_PUSH_INT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETLE_PUSH_INT, 0, 0}, - {INST_DOT4, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, 0, 0}, - {INST_DOT4_IEEE, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4_IEEE, 0, 0}, - {INST_CUBE, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE, 0, 0}, - {INST_MAX4, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX4, 0, 0}, - {INST_MOVA_GPR_INT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_GPR_INT, 0, 0}, - {INST_EXP_IEEE, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, 1, 0}, - {INST_LOG_CLAMPED, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED, 1, 0}, - {INST_LOG_IEEE, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, 1, 0}, - {INST_RECIP_CLAMPED, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_CLAMPED, 1, 0}, - {INST_RECIP_FF, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_FF, 1, 0}, - {INST_RECIP_IEEE, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE, 1, 0}, - {INST_RECIPSQRT_CLAMPED, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_CLAMPED, 1, 0}, - {INST_RECIPSQRT_FF, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_FF, 1, 0}, - {INST_RECIPSQRT_IEEE, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE, 1, 0}, - {INST_SQRT_IEEE, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SQRT_IEEE, 1, 0}, - {INST_FLT_TO_INT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT, 1, 0}, - {INST_INT_TO_FLT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT, 1, 0}, - {INST_UINT_TO_FLT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_UINT_TO_FLT, 1, 0}, - {INST_SIN, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, 1, 0}, - {INST_COS, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, 1, 0}, - {INST_ASHR_INT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ASHR_INT, 1, 0}, - {INST_LSHR_INT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHR_INT, 1, 0}, - {INST_LSHL_INT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHL_INT, 1, 0}, - {INST_MULLO_INT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_INT, 1, 0}, - {INST_MULHI_INT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULHI_INT, 1, 0}, - {INST_MULLO_UINT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_UINT, 1, 0}, - {INST_MULHI_UINT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULHI_UINT, 1, 0}, - {INST_RECIP_INT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_INT, 1, 0}, - {INST_RECIP_UINT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_UINT, 1, 0}, - {INST_FLT_TO_UINT, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_UINT, 1, 0}, - {INST_MUL_LIT, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT, 1, 1}, - {INST_MUL_LIT_M2, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT_M2, 1, 1}, - {INST_MUL_LIT_M4, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT_M4, 1, 1}, - {INST_MUL_LIT_D2, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT_D2, 1, 1}, - {INST_MULADD, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, 0, 1}, - {INST_MULADD_M2, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD_M2, 0, 1}, - {INST_MULADD_M4, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD_M4, 0, 1}, - {INST_MULADD_D2, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD_D2, 0, 1}, - {INST_MULADD_IEEE, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD_IEEE, 0, 1}, - {INST_MULADD_IEEE_M2, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD_IEEE_M2, 0, 1}, - {INST_MULADD_IEEE_M4, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD_IEEE_M4, 0, 1}, - {INST_MULADD_IEEE_D2, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD_IEEE_D2, 0, 1}, - {INST_CNDE, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDE, 0, 1}, - {INST_CNDGT, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT, 0, 1}, - {INST_CNDGE, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE, 0, 1}, - {INST_CNDE_INT, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDE_INT, 0, 1}, - {INST_CNDGT_INT, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT_INT, 0, 1}, - {INST_CNDGE_INT, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE_INT, 0, 1}, -}; - -struct r600_alu_instruction r600_alu_instruction[C_OPCODE_LAST] = { - {C_OPCODE_NOP, INST_NOP}, - {C_OPCODE_MOV, INST_MOV}, - {C_OPCODE_LIT, INST_NOP}, - {C_OPCODE_RCP, INST_RECIP_IEEE}, - {C_OPCODE_RSQ, INST_RECIPSQRT_IEEE}, - {C_OPCODE_EXP, INST_EXP_IEEE}, - {C_OPCODE_LOG, INST_LOG_IEEE}, - {C_OPCODE_MUL, INST_MUL}, - {C_OPCODE_ADD, INST_ADD}, - {C_OPCODE_DP3, INST_DOT4}, - {C_OPCODE_DP4, INST_DOT4}, - {C_OPCODE_DST, INST_NOP}, - {C_OPCODE_MIN, INST_MIN}, - {C_OPCODE_MAX, INST_MAX}, - {C_OPCODE_SLT, INST_NOP}, - {C_OPCODE_SGE, INST_NOP}, - {C_OPCODE_MAD, INST_MULADD}, - {C_OPCODE_SUB, INST_COUNT}, - {C_OPCODE_LRP, INST_NOP}, - {C_OPCODE_CND, INST_NOP}, - {20, INST_NOP}, - {C_OPCODE_DP2A, INST_NOP}, - {22, INST_NOP}, - {23, INST_NOP}, - {C_OPCODE_FRC, INST_NOP}, - {C_OPCODE_CLAMP, INST_NOP}, - {C_OPCODE_FLR, INST_NOP}, - {C_OPCODE_ROUND, INST_NOP}, - {C_OPCODE_EX2, INST_NOP}, - {C_OPCODE_LG2, INST_NOP}, - {C_OPCODE_POW, INST_NOP}, - {C_OPCODE_XPD, INST_NOP}, - {32, INST_NOP}, - {C_OPCODE_ABS, INST_COUNT}, - {C_OPCODE_RCC, INST_NOP}, - {C_OPCODE_DPH, INST_NOP}, - {C_OPCODE_COS, INST_COS}, - {C_OPCODE_DDX, INST_NOP}, - {C_OPCODE_DDY, INST_NOP}, - {C_OPCODE_KILP, INST_NOP}, - {C_OPCODE_PK2H, INST_NOP}, - {C_OPCODE_PK2US, INST_NOP}, - {C_OPCODE_PK4B, INST_NOP}, - {C_OPCODE_PK4UB, INST_NOP}, - {C_OPCODE_RFL, INST_NOP}, - {C_OPCODE_SEQ, INST_NOP}, - {C_OPCODE_SFL, INST_NOP}, - {C_OPCODE_SGT, INST_SETGT}, - {C_OPCODE_SIN, INST_SIN}, - {C_OPCODE_SLE, INST_NOP}, - {C_OPCODE_SNE, INST_NOP}, - {C_OPCODE_STR, INST_NOP}, - {C_OPCODE_TEX, INST_NOP}, - {C_OPCODE_TXD, INST_NOP}, - {C_OPCODE_TXP, INST_NOP}, - {C_OPCODE_UP2H, INST_NOP}, - {C_OPCODE_UP2US, INST_NOP}, - {C_OPCODE_UP4B, INST_NOP}, - {C_OPCODE_UP4UB, INST_NOP}, - {C_OPCODE_X2D, INST_NOP}, - {C_OPCODE_ARA, INST_NOP}, - {C_OPCODE_ARR, INST_NOP}, - {C_OPCODE_BRA, INST_NOP}, - {C_OPCODE_CAL, INST_NOP}, - {C_OPCODE_RET, INST_NOP}, - {C_OPCODE_SSG, INST_NOP}, - {C_OPCODE_CMP, INST_NOP}, - {C_OPCODE_SCS, INST_NOP}, - {C_OPCODE_TXB, INST_NOP}, - {C_OPCODE_NRM, INST_NOP}, - {C_OPCODE_DIV, INST_NOP}, - {C_OPCODE_DP2, INST_NOP}, - {C_OPCODE_TXL, INST_NOP}, - {C_OPCODE_BRK, INST_NOP}, - {C_OPCODE_IF, INST_NOP}, - {C_OPCODE_BGNFOR, INST_NOP}, - {C_OPCODE_REP, INST_NOP}, - {C_OPCODE_ELSE, INST_NOP}, - {C_OPCODE_ENDIF, INST_NOP}, - {C_OPCODE_ENDFOR, INST_NOP}, - {C_OPCODE_ENDREP, INST_NOP}, - {C_OPCODE_PUSHA, INST_NOP}, - {C_OPCODE_POPA, INST_NOP}, - {C_OPCODE_CEIL, INST_NOP}, - {C_OPCODE_I2F, INST_NOP}, - {C_OPCODE_NOT, INST_NOP}, - {C_OPCODE_TRUNC, INST_NOP}, - {C_OPCODE_SHL, INST_NOP}, - {88, INST_NOP}, - {C_OPCODE_AND, INST_NOP}, - {C_OPCODE_OR, INST_NOP}, - {C_OPCODE_MOD, INST_NOP}, - {C_OPCODE_XOR, INST_NOP}, - {C_OPCODE_SAD, INST_NOP}, - {C_OPCODE_TXF, INST_NOP}, - {C_OPCODE_TXQ, INST_NOP}, - {C_OPCODE_CONT, INST_NOP}, - {C_OPCODE_EMIT, INST_NOP}, - {C_OPCODE_ENDPRIM, INST_NOP}, - {C_OPCODE_BGNLOOP, INST_NOP}, - {C_OPCODE_BGNSUB, INST_NOP}, - {C_OPCODE_ENDLOOP, INST_NOP}, - {C_OPCODE_ENDSUB, INST_NOP}, - {103, INST_NOP}, - {104, INST_NOP}, - {105, INST_NOP}, - {106, INST_NOP}, - {107, INST_NOP}, - {108, INST_NOP}, - {109, INST_NOP}, - {110, INST_NOP}, - {111, INST_NOP}, - {C_OPCODE_NRM4, INST_NOP}, - {C_OPCODE_CALLNZ, INST_NOP}, - {C_OPCODE_IFC, INST_NOP}, - {C_OPCODE_BREAKC, INST_NOP}, - {C_OPCODE_KIL, INST_NOP}, - {C_OPCODE_END, INST_NOP}, - {118, INST_NOP}, - {C_OPCODE_F2I, INST_NOP}, - {C_OPCODE_IDIV, INST_NOP}, - {C_OPCODE_IMAX, INST_NOP}, - {C_OPCODE_IMIN, INST_NOP}, - {C_OPCODE_INEG, INST_NOP}, - {C_OPCODE_ISGE, INST_NOP}, - {C_OPCODE_ISHR, INST_NOP}, - {C_OPCODE_ISLT, INST_NOP}, - {C_OPCODE_F2U, INST_NOP}, - {C_OPCODE_U2F, INST_NOP}, - {C_OPCODE_UADD, INST_NOP}, - {C_OPCODE_UDIV, INST_NOP}, - {C_OPCODE_UMAD, INST_NOP}, - {C_OPCODE_UMAX, INST_NOP}, - {C_OPCODE_UMIN, INST_NOP}, - {C_OPCODE_UMOD, INST_NOP}, - {C_OPCODE_UMUL, INST_NOP}, - {C_OPCODE_USEQ, INST_NOP}, - {C_OPCODE_USGE, INST_NOP}, - {C_OPCODE_USHR, INST_NOP}, - {C_OPCODE_USLT, INST_NOP}, - {C_OPCODE_USNE, INST_NOP}, - {C_OPCODE_SWITCH, INST_NOP}, - {C_OPCODE_CASE, INST_NOP}, - {C_OPCODE_DEFAULT, INST_NOP}, - {C_OPCODE_ENDSWITCH, INST_NOP}, - {C_OPCODE_VFETCH, INST_NOP}, - {C_OPCODE_ENTRY, INST_NOP}, - {C_OPCODE_ARL, INST_NOP}, -}; - - -static int r600_shader_alu_bytecode(struct r600_shader *rshader, - struct r600_shader_node *rnode, - struct r600_shader_inst *alu, - unsigned *cid) -{ - unsigned id = *cid; - - /* don't replace gpr by pv or ps for destination register */ - if (alu->is_op3) { - rshader->bcode[id++] = S_SQ_ALU_WORD0_SRC0_SEL(alu->src[0].sel) | - S_SQ_ALU_WORD0_SRC0_CHAN(alu->src[0].chan) | - S_SQ_ALU_WORD0_SRC1_SEL(alu->src[1].sel) | - S_SQ_ALU_WORD0_SRC1_CHAN(alu->src[1].chan) | - S_SQ_ALU_WORD0_LAST(alu->last); - rshader->bcode[id++] = S_SQ_ALU_WORD1_DST_GPR(alu->dst.sel) | - S_SQ_ALU_WORD1_DST_CHAN(alu->dst.chan) | - S_SQ_ALU_WORD1_OP3_SRC2_SEL(alu->src[2].sel) | - S_SQ_ALU_WORD1_OP3_SRC2_CHAN(alu->src[2].chan) | - S_SQ_ALU_WORD1_OP3_SRC2_NEG(alu->src[2].neg) | - S_SQ_ALU_WORD1_OP3_ALU_INST(alu->opcode) | - S_SQ_ALU_WORD1_BANK_SWIZZLE(0); - } else { - rshader->bcode[id++] = S_SQ_ALU_WORD0_SRC0_SEL(alu->src[0].sel) | - S_SQ_ALU_WORD0_SRC0_CHAN(alu->src[0].chan) | - S_SQ_ALU_WORD0_SRC0_NEG(alu->src[0].neg) | - S_SQ_ALU_WORD0_SRC1_SEL(alu->src[1].sel) | - S_SQ_ALU_WORD0_SRC1_CHAN(alu->src[1].chan) | - S_SQ_ALU_WORD0_SRC1_NEG(alu->src[1].neg) | - S_SQ_ALU_WORD0_LAST(alu->last); - rshader->bcode[id++] = S_SQ_ALU_WORD1_DST_GPR(alu->dst.sel) | - S_SQ_ALU_WORD1_DST_CHAN(alu->dst.chan) | - S_SQ_ALU_WORD1_OP2_SRC0_ABS(alu->src[0].abs) | - S_SQ_ALU_WORD1_OP2_SRC1_ABS(alu->src[1].abs) | - S_SQ_ALU_WORD1_OP2_WRITE_MASK(1) | - S_SQ_ALU_WORD1_OP2_ALU_INST(alu->opcode) | - S_SQ_ALU_WORD1_BANK_SWIZZLE(0); - } - *cid = id; - return 0; -} - -int r6xx_shader_alu_translate(struct r600_shader *rshader, - struct r600_shader_node *rnode, - unsigned *cid) -{ - struct r600_shader_alu *alu; - unsigned id = *cid; - int i; - int r = 0; - LIST_FOR_EACH_ENTRY(alu, &rnode->alu, head) { - for (i = 0; i < alu->nalu; i++) { - r = r600_shader_alu_bytecode(rshader, rnode, &alu->alu[i], &id); - if (r) - goto out; - } - for (i = 0; i < alu->nliteral; i++) { - rshader->bcode[id++] = alu->literal[i]; - } - } -out: - *cid = id; - return r; -} diff --git a/src/gallium/drivers/r600/r600_compiler_r700.c b/src/gallium/drivers/r600/r600_compiler_r700.c deleted file mode 100644 index 0b43942866..0000000000 --- a/src/gallium/drivers/r600/r600_compiler_r700.c +++ /dev/null @@ -1,233 +0,0 @@ -/* - * Copyright 2010 Jerome Glisse - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - */ -#include -#include -#include -#include -#include -#include "r600_context.h" -#include "r700_sq.h" - -static int r700_shader_cf_node_bytecode(struct r600_shader *rshader, - struct r600_shader_node *rnode, - unsigned *cid) -{ - unsigned id = *cid; - - if (rnode->nfetch) { - rshader->bcode[id++] = S_SQ_CF_WORD0_ADDR(rnode->cf_addr >> 1); - rshader->bcode[id++] = S_SQ_CF_WORD1_CF_INST(V_SQ_CF_WORD1_SQ_CF_INST_VTX) | - S_SQ_CF_WORD1_BARRIER(1) | - S_SQ_CF_WORD1_COUNT(rnode->nfetch - 1); - } else { - rshader->bcode[id++] = S_SQ_CF_ALU_WORD0_ADDR(rnode->cf_addr >> 1); - rshader->bcode[id++] = S_SQ_CF_ALU_WORD1_CF_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU) | - S_SQ_CF_ALU_WORD1_BARRIER(1) | - S_SQ_CF_ALU_WORD1_COUNT(rnode->nslot - 1); - } - *cid = id; - return 0; -} - -static int r700_shader_cf_output_bytecode(struct r600_shader *rshader, - struct c_vector *v, - unsigned *cid, - unsigned end) -{ - struct r600_shader_operand out; - unsigned id = *cid; - int r; - - r = r600_shader_find_gpr(rshader, v, 0, &out); - if (r) - return r; - rshader->bcode[id + 0] = S_SQ_CF_ALLOC_EXPORT_WORD0_RW_GPR(out.sel) | - S_SQ_CF_ALLOC_EXPORT_WORD0_ELEM_SIZE(3); - rshader->bcode[id + 1] = S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_X(0) | - S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Y(1) | - S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Z(2) | - S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_W(3) | - S_SQ_CF_ALLOC_EXPORT_WORD1_BARRIER(1) | - S_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE) | - S_SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM(end); - switch (v->name) { - case C_SEMANTIC_POSITION: - rshader->bcode[id + 0] |= S_SQ_CF_ALLOC_EXPORT_WORD0_ARRAY_BASE(60) | - S_SQ_CF_ALLOC_EXPORT_WORD0_TYPE(V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS); - break; - case C_SEMANTIC_COLOR: - if (rshader->cshader.type == C_PROGRAM_TYPE_VS) { - rshader->output[rshader->noutput].gpr = out.sel; - rshader->output[rshader->noutput].sid = v->sid; - rshader->output[rshader->noutput].name = v->name; - rshader->bcode[id + 0] |= S_SQ_CF_ALLOC_EXPORT_WORD0_ARRAY_BASE(rshader->noutput++) | - S_SQ_CF_ALLOC_EXPORT_WORD0_TYPE(V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM); - } else { - rshader->bcode[id + 0] |= S_SQ_CF_ALLOC_EXPORT_WORD0_ARRAY_BASE(0) | - S_SQ_CF_ALLOC_EXPORT_WORD0_TYPE(V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL); - } - break; - case C_SEMANTIC_GENERIC: - rshader->output[rshader->noutput].gpr = out.sel; - rshader->output[rshader->noutput].sid = v->sid; - rshader->output[rshader->noutput].name = v->name; - rshader->bcode[id + 0] |= S_SQ_CF_ALLOC_EXPORT_WORD0_ARRAY_BASE(rshader->noutput++) | - S_SQ_CF_ALLOC_EXPORT_WORD0_TYPE(V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM); - break; - default: - fprintf(stderr, "%s:%d unsupported\n", __func__, __LINE__); - return -EINVAL; - } - *cid = id + 2; - return 0; -} - -static int r700_shader_alu_bytecode(struct r600_shader *rshader, - struct r600_shader_node *rnode, - struct r600_shader_inst *alu, - unsigned *cid) -{ - unsigned id = *cid; - - /* don't replace gpr by pv or ps for destination register */ - if (alu->is_op3) { - rshader->bcode[id++] = S_SQ_ALU_WORD0_SRC0_SEL(alu->src[0].sel) | - S_SQ_ALU_WORD0_SRC0_CHAN(alu->src[0].chan) | - S_SQ_ALU_WORD0_SRC1_SEL(alu->src[1].sel) | - S_SQ_ALU_WORD0_SRC1_CHAN(alu->src[1].chan) | - S_SQ_ALU_WORD0_LAST(alu->last); - rshader->bcode[id++] = S_SQ_ALU_WORD1_DST_GPR(alu->dst.sel) | - S_SQ_ALU_WORD1_DST_CHAN(alu->dst.chan) | - S_SQ_ALU_WORD1_OP3_SRC2_SEL(alu->src[2].sel) | - S_SQ_ALU_WORD1_OP3_SRC2_CHAN(alu->src[2].chan) | - S_SQ_ALU_WORD1_OP3_SRC2_NEG(alu->src[2].neg) | - S_SQ_ALU_WORD1_OP3_ALU_INST(alu->opcode) | - S_SQ_ALU_WORD1_BANK_SWIZZLE(0); - } else { - rshader->bcode[id++] = S_SQ_ALU_WORD0_SRC0_SEL(alu->src[0].sel) | - S_SQ_ALU_WORD0_SRC0_CHAN(alu->src[0].chan) | - S_SQ_ALU_WORD0_SRC0_NEG(alu->src[0].neg) | - S_SQ_ALU_WORD0_SRC1_SEL(alu->src[1].sel) | - S_SQ_ALU_WORD0_SRC1_CHAN(alu->src[1].chan) | - S_SQ_ALU_WORD0_SRC1_NEG(alu->src[1].neg) | - S_SQ_ALU_WORD0_LAST(alu->last); - rshader->bcode[id++] = S_SQ_ALU_WORD1_DST_GPR(alu->dst.sel) | - S_SQ_ALU_WORD1_DST_CHAN(alu->dst.chan) | - S_SQ_ALU_WORD1_OP2_SRC0_ABS(alu->src[0].abs) | - S_SQ_ALU_WORD1_OP2_SRC1_ABS(alu->src[1].abs) | - S_SQ_ALU_WORD1_OP2_WRITE_MASK(1) | - S_SQ_ALU_WORD1_OP2_ALU_INST(alu->opcode) | - S_SQ_ALU_WORD1_BANK_SWIZZLE(0); - } - *cid = id; - return 0; -} - -static int r700_shader_alu_translate(struct r600_shader *rshader, - struct r600_shader_node *rnode, - unsigned *cid) - -{ - struct r600_shader_alu *alu; - unsigned id = *cid; - int i; - int r = 0; - LIST_FOR_EACH_ENTRY(alu, &rnode->alu, head) { - for (i = 0; i < alu->nalu; i++) { - r = r700_shader_alu_bytecode(rshader, rnode, &alu->alu[i], &id); - if (r) - goto out; - } - for (i = 0; i < alu->nliteral; i++) { - rshader->bcode[id++] = alu->literal[i]; - } - } - out: - *cid = id; - return r; -} - -int r700_shader_translate(struct r600_shader *rshader) -{ - struct c_shader *shader = &rshader->cshader; - struct r600_shader_node *rnode; - struct r600_shader_vfetch *vfetch; - struct c_vector *v; - unsigned id, end; - int r; - - r = r600_shader_register(rshader); - if (r) { - fprintf(stderr, "%s %d register allocation failed\n", __FILE__, __LINE__); - return r; - } - r = r600_shader_translate_rec(rshader, &shader->entry); - if (r) { - fprintf(stderr, "%s %d translation failed\n", __FILE__, __LINE__); - return r; - } - r = r600_shader_legalize(rshader); - if (r) { - fprintf(stderr, "%s %d legalize failed\n", __FILE__, __LINE__); - return r; - } - r600_shader_node_place(rshader); - rshader->bcode = malloc(rshader->ndw * 4); - if (rshader->bcode == NULL) - return -ENOMEM; - LIST_FOR_EACH_ENTRY(rnode, &rshader->nodes, head) { - id = rnode->cf_addr; - LIST_FOR_EACH_ENTRY(vfetch, &rnode->vfetch, head) { - r = r600_shader_vfetch_bytecode(rshader, rnode, vfetch, &id); - if (r) - return r; - } - if (rshader->r6xx_compile) - r = r6xx_shader_alu_translate(rshader, rnode, &id); - else - r = r700_shader_alu_translate(rshader, rnode, &id); - if (r) - return r; - } - id = 0; - LIST_FOR_EACH_ENTRY(rnode, &rshader->nodes, head) { - r = r700_shader_cf_node_bytecode(rshader, rnode, &id); - if (r) - return r; - } - LIST_FOR_EACH_ENTRY(v, &rshader->cshader.files[C_FILE_OUTPUT].vectors, head) { - end = 0; - if (v->head.next == &rshader->cshader.files[C_FILE_OUTPUT].vectors) - end = 1; - r = r700_shader_cf_output_bytecode(rshader, v, &id, end); - if (r) - return r; - } - LIST_FOR_EACH_ENTRY(v, &rshader->cshader.files[C_FILE_INPUT].vectors, head) { - rshader->input[rshader->ninput].gpr = rshader->ninput; - rshader->input[rshader->ninput].sid = v->sid; - rshader->input[rshader->ninput].name = v->name; - rshader->ninput++; - } - return 0; -} diff --git a/src/gallium/drivers/r600/r600_compiler_tgsi.c b/src/gallium/drivers/r600/r600_compiler_tgsi.c deleted file mode 100644 index 172cf154a3..0000000000 --- a/src/gallium/drivers/r600/r600_compiler_tgsi.c +++ /dev/null @@ -1,730 +0,0 @@ -/* - * Copyright 2010 Jerome Glisse - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - */ -#include -#include -#include -#include -#include -#include -#include -#include "r600_shader.h" -#include "r600_context.h" - -struct tgsi_shader { - struct c_vector **v[TGSI_FILE_COUNT]; - struct tgsi_shader_info info; - struct tgsi_parse_context parser; - const struct tgsi_token *tokens; - struct c_shader *shader; - struct c_node *node; -}; - -static unsigned tgsi_file_to_c_file(unsigned file); -static unsigned tgsi_sname_to_c_sname(unsigned sname); -static int tgsi_opcode_to_c_opcode(unsigned opcode, unsigned *copcode); - -static int tgsi_shader_init(struct tgsi_shader *ts, - const struct tgsi_token *tokens, - struct c_shader *shader) -{ - int i; - - ts->shader = shader; - ts->tokens = tokens; - tgsi_scan_shader(ts->tokens, &ts->info); - tgsi_parse_init(&ts->parser, ts->tokens); - /* initialize to NULL in case of error */ - for (i = 0; i < C_FILE_COUNT; i++) { - ts->v[i] = NULL; - } - for (i = 0; i < TGSI_FILE_COUNT; i++) { - if (ts->info.file_count[i] > 0) { - ts->v[i] = calloc(ts->info.file_count[i], sizeof(void*)); - if (ts->v[i] == NULL) { - fprintf(stderr, "%s:%d unsupported %d %d\n", __func__, __LINE__, i, ts->info.file_count[i]); - return -ENOMEM; - } - } - } - return 0; -} - -static void tgsi_shader_destroy(struct tgsi_shader *ts) -{ - int i; - - for (i = 0; i < TGSI_FILE_COUNT; i++) { - free(ts->v[i]); - } - tgsi_parse_free(&ts->parser); -} - -static int ntransform_declaration(struct tgsi_shader *ts) -{ - struct tgsi_full_declaration *fd = &ts->parser.FullToken.FullDeclaration; - struct c_vector *v; - unsigned file; - unsigned name; - int sid; - int i; - - if (fd->Declaration.Dimension) { - fprintf(stderr, "%s:%d unsupported\n", __func__, __LINE__); - return -EINVAL; - } - for (i = fd->Range.First ; i <= fd->Range.Last; i++) { - sid = i; - name = C_SEMANTIC_GENERIC; - file = tgsi_file_to_c_file(fd->Declaration.File); - if (file == TGSI_FILE_NULL) { - fprintf(stderr, "%s:%d unsupported\n", __func__, __LINE__); - return -EINVAL; - } - if (fd->Declaration.Semantic) { - name = tgsi_sname_to_c_sname(fd->Semantic.Name); - sid = fd->Semantic.Index; - } - v = c_shader_vector_new(ts->shader, file, name, sid); - if (v == NULL) { - fprintf(stderr, "%s:%d unsupported\n", __func__, __LINE__); - return -ENOMEM; - } - ts->v[fd->Declaration.File][i] = v; - } - return 0; -} - -static int ntransform_immediate(struct tgsi_shader *ts) -{ - struct tgsi_full_immediate *fd = &ts->parser.FullToken.FullImmediate; - struct c_vector *v; - unsigned file; - unsigned name; - - if (fd->Immediate.DataType != TGSI_IMM_FLOAT32) { - fprintf(stderr, "%s:%d unsupported\n", __func__, __LINE__); - return -EINVAL; - } - name = C_SEMANTIC_GENERIC; - file = C_FILE_IMMEDIATE; - v = c_shader_vector_new(ts->shader, file, name, 0); - if (v == NULL) { - fprintf(stderr, "%s:%d unsupported\n", __func__, __LINE__); - return -ENOMEM; - } - v->channel[0]->value = fd->u[0].Uint; - v->channel[1]->value = fd->u[1].Uint; - v->channel[2]->value = fd->u[2].Uint; - v->channel[3]->value = fd->u[3].Uint; - ts->v[TGSI_FILE_IMMEDIATE][0] = v; - return 0; -} - -static int ntransform_instruction(struct tgsi_shader *ts) -{ - struct tgsi_full_instruction *fi = &ts->parser.FullToken.FullInstruction; - struct c_shader *shader = ts->shader; - struct c_instruction instruction; - unsigned opcode; - int i, j, r; - - if (fi->Instruction.NumDstRegs > 1) { - fprintf(stderr, "%s %d unsupported\n", __func__, __LINE__); - return -EINVAL; - } - if (fi->Instruction.Saturate) { - fprintf(stderr, "%s %d unsupported\n", __func__, __LINE__); - return -EINVAL; - } - if (fi->Instruction.Predicate) { - fprintf(stderr, "%s %d unsupported\n", __func__, __LINE__); - return -EINVAL; - } - if (fi->Instruction.Label) { - fprintf(stderr, "%s %d unsupported\n", __func__, __LINE__); - return -EINVAL; - } - if (fi->Instruction.Texture) { - fprintf(stderr, "%s %d unsupported\n", __func__, __LINE__); - return -EINVAL; - } - for (i = 0; i < fi->Instruction.NumSrcRegs; i++) { - if (fi->Src[i].Register.Indirect || - fi->Src[i].Register.Dimension || - fi->Src[i].Register.Absolute) { - fprintf(stderr, "%s %d unsupported\n", __func__, __LINE__); - return -EINVAL; - } - } - for (i = 0; i < fi->Instruction.NumDstRegs; i++) { - if (fi->Dst[i].Register.Indirect || fi->Dst[i].Register.Dimension) { - fprintf(stderr, "%s %d unsupported\n", __func__, __LINE__); - return -EINVAL; - } - } - r = tgsi_opcode_to_c_opcode(fi->Instruction.Opcode, &opcode); - if (r) { - fprintf(stderr, "%s:%d unsupported\n", __func__, __LINE__); - return r; - } - if (opcode == C_OPCODE_END) { - return c_node_cfg_link(ts->node, &shader->end); - } - /* FIXME add flow instruction handling */ - memset(&instruction, 0, sizeof(struct c_instruction)); - instruction.nop = 0; - for (j = 0; j < 4; j++) { - instruction.op[instruction.nop].opcode = opcode; - instruction.op[instruction.nop].ninput = fi->Instruction.NumSrcRegs; - for (i = 0; i < fi->Instruction.NumSrcRegs; i++) { - instruction.op[instruction.nop].input[i].vector = ts->v[fi->Src[i].Register.File][fi->Src[i].Register.Index]; - switch (j) { - case 0: - instruction.op[instruction.nop].input[i].swizzle = fi->Src[i].Register.SwizzleX; - break; - case 1: - instruction.op[instruction.nop].input[i].swizzle = fi->Src[i].Register.SwizzleY; - break; - case 2: - instruction.op[instruction.nop].input[i].swizzle = fi->Src[i].Register.SwizzleZ; - break; - case 3: - instruction.op[instruction.nop].input[i].swizzle = fi->Src[i].Register.SwizzleW; - break; - default: - return -EINVAL; - } - } - instruction.op[instruction.nop].output.vector = ts->v[fi->Dst[0].Register.File][fi->Dst[0].Register.Index]; - switch (j) { - case 0: - instruction.op[instruction.nop].output.swizzle = (fi->Dst[0].Register.WriteMask & 0x1) ? C_SWIZZLE_X : C_SWIZZLE_D; - break; - case 1: - instruction.op[instruction.nop].output.swizzle = (fi->Dst[0].Register.WriteMask & 0x1) ? C_SWIZZLE_Y : C_SWIZZLE_D; - break; - case 2: - instruction.op[instruction.nop].output.swizzle = (fi->Dst[0].Register.WriteMask & 0x1) ? C_SWIZZLE_Z : C_SWIZZLE_D; - break; - case 3: - instruction.op[instruction.nop].output.swizzle = (fi->Dst[0].Register.WriteMask & 0x1) ? C_SWIZZLE_W : C_SWIZZLE_D; - break; - default: - return -EINVAL; - } - instruction.nop++; - } - return c_node_add_new_instruction(ts->node, &instruction); -} - -int c_shader_from_tgsi(struct c_shader *shader, unsigned type, - const struct tgsi_token *tokens) -{ - struct tgsi_shader ts; - int r = 0; - - c_shader_init(shader, type); - r = tgsi_shader_init(&ts, tokens, shader); - if (r) - goto out_err; - ts.shader = shader; - ts.node = &shader->entry; - while (!tgsi_parse_end_of_tokens(&ts.parser)) { - tgsi_parse_token(&ts.parser); - switch (ts.parser.FullToken.Token.Type) { - case TGSI_TOKEN_TYPE_IMMEDIATE: - r = ntransform_immediate(&ts); - if (r) - goto out_err; - break; - case TGSI_TOKEN_TYPE_DECLARATION: - r = ntransform_declaration(&ts); - if (r) - goto out_err; - break; - case TGSI_TOKEN_TYPE_INSTRUCTION: - r = ntransform_instruction(&ts); - if (r) - goto out_err; - break; - default: - r = -EINVAL; - goto out_err; - } - } - tgsi_shader_destroy(&ts); - return 0; -out_err: - c_shader_destroy(shader); - tgsi_shader_destroy(&ts); - return r; -} - -static unsigned tgsi_file_to_c_file(unsigned file) -{ - switch (file) { - case TGSI_FILE_CONSTANT: - return C_FILE_CONSTANT; - case TGSI_FILE_INPUT: - return C_FILE_INPUT; - case TGSI_FILE_OUTPUT: - return C_FILE_OUTPUT; - case TGSI_FILE_TEMPORARY: - return C_FILE_TEMPORARY; - case TGSI_FILE_SAMPLER: - return C_FILE_SAMPLER; - case TGSI_FILE_ADDRESS: - return C_FILE_ADDRESS; - case TGSI_FILE_IMMEDIATE: - return C_FILE_IMMEDIATE; - case TGSI_FILE_PREDICATE: - return C_FILE_PREDICATE; - case TGSI_FILE_SYSTEM_VALUE: - return C_FILE_SYSTEM_VALUE; - case TGSI_FILE_NULL: - return C_FILE_NULL; - default: - fprintf(stderr, "%s:%d unsupported file %d\n", __func__, __LINE__, file); - return C_FILE_NULL; - } -} - -static unsigned tgsi_sname_to_c_sname(unsigned sname) -{ - switch (sname) { - case TGSI_SEMANTIC_POSITION: - return C_SEMANTIC_POSITION; - case TGSI_SEMANTIC_COLOR: - return C_SEMANTIC_COLOR; - case TGSI_SEMANTIC_BCOLOR: - return C_SEMANTIC_BCOLOR; - case TGSI_SEMANTIC_FOG: - return C_SEMANTIC_FOG; - case TGSI_SEMANTIC_PSIZE: - return C_SEMANTIC_PSIZE; - case TGSI_SEMANTIC_GENERIC: - return C_SEMANTIC_GENERIC; - case TGSI_SEMANTIC_NORMAL: - return C_SEMANTIC_NORMAL; - case TGSI_SEMANTIC_FACE: - return C_SEMANTIC_FACE; - case TGSI_SEMANTIC_EDGEFLAG: - return C_SEMANTIC_EDGEFLAG; - case TGSI_SEMANTIC_PRIMID: - return C_SEMANTIC_PRIMID; - case TGSI_SEMANTIC_INSTANCEID: - return C_SEMANTIC_INSTANCEID; - default: - return C_SEMANTIC_GENERIC; - } -} - -static int tgsi_opcode_to_c_opcode(unsigned opcode, unsigned *copcode) -{ - switch (opcode) { - case TGSI_OPCODE_MOV: - *copcode = C_OPCODE_MOV; - return 0; - case TGSI_OPCODE_MUL: - *copcode = C_OPCODE_MUL; - return 0; - case TGSI_OPCODE_MAD: - *copcode = C_OPCODE_MAD; - return 0; - case TGSI_OPCODE_END: - *copcode = C_OPCODE_END; - return 0; - case TGSI_OPCODE_ARL: - *copcode = C_OPCODE_ARL; - return 0; - case TGSI_OPCODE_LIT: - *copcode = C_OPCODE_LIT; - return 0; - case TGSI_OPCODE_RCP: - *copcode = C_OPCODE_RCP; - return 0; - case TGSI_OPCODE_RSQ: - *copcode = C_OPCODE_RSQ; - return 0; - case TGSI_OPCODE_EXP: - *copcode = C_OPCODE_EXP; - return 0; - case TGSI_OPCODE_LOG: - *copcode = C_OPCODE_LOG; - return 0; - case TGSI_OPCODE_ADD: - *copcode = C_OPCODE_ADD; - return 0; - case TGSI_OPCODE_DP3: - *copcode = C_OPCODE_DP3; - return 0; - case TGSI_OPCODE_DP4: - *copcode = C_OPCODE_DP4; - return 0; - case TGSI_OPCODE_DST: - *copcode = C_OPCODE_DST; - return 0; - case TGSI_OPCODE_MIN: - *copcode = C_OPCODE_MIN; - return 0; - case TGSI_OPCODE_MAX: - *copcode = C_OPCODE_MAX; - return 0; - case TGSI_OPCODE_SLT: - *copcode = C_OPCODE_SLT; - return 0; - case TGSI_OPCODE_SGE: - *copcode = C_OPCODE_SGE; - return 0; - case TGSI_OPCODE_SUB: - *copcode = C_OPCODE_SUB; - return 0; - case TGSI_OPCODE_LRP: - *copcode = C_OPCODE_LRP; - return 0; - case TGSI_OPCODE_CND: - *copcode = C_OPCODE_CND; - return 0; - case TGSI_OPCODE_DP2A: - *copcode = C_OPCODE_DP2A; - return 0; - case TGSI_OPCODE_FRC: - *copcode = C_OPCODE_FRC; - return 0; - case TGSI_OPCODE_CLAMP: - *copcode = C_OPCODE_CLAMP; - return 0; - case TGSI_OPCODE_FLR: - *copcode = C_OPCODE_FLR; - return 0; - case TGSI_OPCODE_ROUND: - *copcode = C_OPCODE_ROUND; - return 0; - case TGSI_OPCODE_EX2: - *copcode = C_OPCODE_EX2; - return 0; - case TGSI_OPCODE_LG2: - *copcode = C_OPCODE_LG2; - return 0; - case TGSI_OPCODE_POW: - *copcode = C_OPCODE_POW; - return 0; - case TGSI_OPCODE_XPD: - *copcode = C_OPCODE_XPD; - return 0; - case TGSI_OPCODE_ABS: - *copcode = C_OPCODE_ABS; - return 0; - case TGSI_OPCODE_RCC: - *copcode = C_OPCODE_RCC; - return 0; - case TGSI_OPCODE_DPH: - *copcode = C_OPCODE_DPH; - return 0; - case TGSI_OPCODE_COS: - *copcode = C_OPCODE_COS; - return 0; - case TGSI_OPCODE_DDX: - *copcode = C_OPCODE_DDX; - return 0; - case TGSI_OPCODE_DDY: - *copcode = C_OPCODE_DDY; - return 0; - case TGSI_OPCODE_KILP: - *copcode = C_OPCODE_KILP; - return 0; - case TGSI_OPCODE_PK2H: - *copcode = C_OPCODE_PK2H; - return 0; - case TGSI_OPCODE_PK2US: - *copcode = C_OPCODE_PK2US; - return 0; - case TGSI_OPCODE_PK4B: - *copcode = C_OPCODE_PK4B; - return 0; - case TGSI_OPCODE_PK4UB: - *copcode = C_OPCODE_PK4UB; - return 0; - case TGSI_OPCODE_RFL: - *copcode = C_OPCODE_RFL; - return 0; - case TGSI_OPCODE_SEQ: - *copcode = C_OPCODE_SEQ; - return 0; - case TGSI_OPCODE_SFL: - *copcode = C_OPCODE_SFL; - return 0; - case TGSI_OPCODE_SGT: - *copcode = C_OPCODE_SGT; - return 0; - case TGSI_OPCODE_SIN: - *copcode = C_OPCODE_SIN; - return 0; - case TGSI_OPCODE_SLE: - *copcode = C_OPCODE_SLE; - return 0; - case TGSI_OPCODE_SNE: - *copcode = C_OPCODE_SNE; - return 0; - case TGSI_OPCODE_STR: - *copcode = C_OPCODE_STR; - return 0; - case TGSI_OPCODE_TEX: - *copcode = C_OPCODE_TEX; - return 0; - case TGSI_OPCODE_TXD: - *copcode = C_OPCODE_TXD; - return 0; - case TGSI_OPCODE_TXP: - *copcode = C_OPCODE_TXP; - return 0; - case TGSI_OPCODE_UP2H: - *copcode = C_OPCODE_UP2H; - return 0; - case TGSI_OPCODE_UP2US: - *copcode = C_OPCODE_UP2US; - return 0; - case TGSI_OPCODE_UP4B: - *copcode = C_OPCODE_UP4B; - return 0; - case TGSI_OPCODE_UP4UB: - *copcode = C_OPCODE_UP4UB; - return 0; - case TGSI_OPCODE_X2D: - *copcode = C_OPCODE_X2D; - return 0; - case TGSI_OPCODE_ARA: - *copcode = C_OPCODE_ARA; - return 0; - case TGSI_OPCODE_ARR: - *copcode = C_OPCODE_ARR; - return 0; - case TGSI_OPCODE_BRA: - *copcode = C_OPCODE_BRA; - return 0; - case TGSI_OPCODE_CAL: - *copcode = C_OPCODE_CAL; - return 0; - case TGSI_OPCODE_RET: - *copcode = C_OPCODE_RET; - return 0; - case TGSI_OPCODE_SSG: - *copcode = C_OPCODE_SSG; - return 0; - case TGSI_OPCODE_CMP: - *copcode = C_OPCODE_CMP; - return 0; - case TGSI_OPCODE_SCS: - *copcode = C_OPCODE_SCS; - return 0; - case TGSI_OPCODE_TXB: - *copcode = C_OPCODE_TXB; - return 0; - case TGSI_OPCODE_NRM: - *copcode = C_OPCODE_NRM; - return 0; - case TGSI_OPCODE_DIV: - *copcode = C_OPCODE_DIV; - return 0; - case TGSI_OPCODE_DP2: - *copcode = C_OPCODE_DP2; - return 0; - case TGSI_OPCODE_TXL: - *copcode = C_OPCODE_TXL; - return 0; - case TGSI_OPCODE_BRK: - *copcode = C_OPCODE_BRK; - return 0; - case TGSI_OPCODE_IF: - *copcode = C_OPCODE_IF; - return 0; - case TGSI_OPCODE_ELSE: - *copcode = C_OPCODE_ELSE; - return 0; - case TGSI_OPCODE_ENDIF: - *copcode = C_OPCODE_ENDIF; - return 0; - case TGSI_OPCODE_PUSHA: - *copcode = C_OPCODE_PUSHA; - return 0; - case TGSI_OPCODE_POPA: - *copcode = C_OPCODE_POPA; - return 0; - case TGSI_OPCODE_CEIL: - *copcode = C_OPCODE_CEIL; - return 0; - case TGSI_OPCODE_I2F: - *copcode = C_OPCODE_I2F; - return 0; - case TGSI_OPCODE_NOT: - *copcode = C_OPCODE_NOT; - return 0; - case TGSI_OPCODE_TRUNC: - *copcode = C_OPCODE_TRUNC; - return 0; - case TGSI_OPCODE_SHL: - *copcode = C_OPCODE_SHL; - return 0; - case TGSI_OPCODE_AND: - *copcode = C_OPCODE_AND; - return 0; - case TGSI_OPCODE_OR: - *copcode = C_OPCODE_OR; - return 0; - case TGSI_OPCODE_MOD: - *copcode = C_OPCODE_MOD; - return 0; - case TGSI_OPCODE_XOR: - *copcode = C_OPCODE_XOR; - return 0; - case TGSI_OPCODE_SAD: - *copcode = C_OPCODE_SAD; - return 0; - case TGSI_OPCODE_TXF: - *copcode = C_OPCODE_TXF; - return 0; - case TGSI_OPCODE_TXQ: - *copcode = C_OPCODE_TXQ; - return 0; - case TGSI_OPCODE_CONT: - *copcode = C_OPCODE_CONT; - return 0; - case TGSI_OPCODE_EMIT: - *copcode = C_OPCODE_EMIT; - return 0; - case TGSI_OPCODE_ENDPRIM: - *copcode = C_OPCODE_ENDPRIM; - return 0; - case TGSI_OPCODE_BGNLOOP: - *copcode = C_OPCODE_BGNLOOP; - return 0; - case TGSI_OPCODE_BGNSUB: - *copcode = C_OPCODE_BGNSUB; - return 0; - case TGSI_OPCODE_ENDLOOP: - *copcode = C_OPCODE_ENDLOOP; - return 0; - case TGSI_OPCODE_ENDSUB: - *copcode = C_OPCODE_ENDSUB; - return 0; - case TGSI_OPCODE_NOP: - *copcode = C_OPCODE_NOP; - return 0; - case TGSI_OPCODE_NRM4: - *copcode = C_OPCODE_NRM4; - return 0; - case TGSI_OPCODE_CALLNZ: - *copcode = C_OPCODE_CALLNZ; - return 0; - case TGSI_OPCODE_IFC: - *copcode = C_OPCODE_IFC; - return 0; - case TGSI_OPCODE_BREAKC: - *copcode = C_OPCODE_BREAKC; - return 0; - case TGSI_OPCODE_KIL: - *copcode = C_OPCODE_KIL; - return 0; - case TGSI_OPCODE_F2I: - *copcode = C_OPCODE_F2I; - return 0; - case TGSI_OPCODE_IDIV: - *copcode = C_OPCODE_IDIV; - return 0; - case TGSI_OPCODE_IMAX: - *copcode = C_OPCODE_IMAX; - return 0; - case TGSI_OPCODE_IMIN: - *copcode = C_OPCODE_IMIN; - return 0; - case TGSI_OPCODE_INEG: - *copcode = C_OPCODE_INEG; - return 0; - case TGSI_OPCODE_ISGE: - *copcode = C_OPCODE_ISGE; - return 0; - case TGSI_OPCODE_ISHR: - *copcode = C_OPCODE_ISHR; - return 0; - case TGSI_OPCODE_ISLT: - *copcode = C_OPCODE_ISLT; - return 0; - case TGSI_OPCODE_F2U: - *copcode = C_OPCODE_F2U; - return 0; - case TGSI_OPCODE_U2F: - *copcode = C_OPCODE_U2F; - return 0; - case TGSI_OPCODE_UADD: - *copcode = C_OPCODE_UADD; - return 0; - case TGSI_OPCODE_UDIV: - *copcode = C_OPCODE_UDIV; - return 0; - case TGSI_OPCODE_UMAD: - *copcode = C_OPCODE_UMAD; - return 0; - case TGSI_OPCODE_UMAX: - *copcode = C_OPCODE_UMAX; - return 0; - case TGSI_OPCODE_UMIN: - *copcode = C_OPCODE_UMIN; - return 0; - case TGSI_OPCODE_UMOD: - *copcode = C_OPCODE_UMOD; - return 0; - case TGSI_OPCODE_UMUL: - *copcode = C_OPCODE_UMUL; - return 0; - case TGSI_OPCODE_USEQ: - *copcode = C_OPCODE_USEQ; - return 0; - case TGSI_OPCODE_USGE: - *copcode = C_OPCODE_USGE; - return 0; - case TGSI_OPCODE_USHR: - *copcode = C_OPCODE_USHR; - return 0; - case TGSI_OPCODE_USLT: - *copcode = C_OPCODE_USLT; - return 0; - case TGSI_OPCODE_USNE: - *copcode = C_OPCODE_USNE; - return 0; - case TGSI_OPCODE_SWITCH: - *copcode = C_OPCODE_SWITCH; - return 0; - case TGSI_OPCODE_CASE: - *copcode = C_OPCODE_CASE; - return 0; - case TGSI_OPCODE_DEFAULT: - *copcode = C_OPCODE_DEFAULT; - return 0; - case TGSI_OPCODE_ENDSWITCH: - *copcode = C_OPCODE_ENDSWITCH; - return 0; - default: - fprintf(stderr, "%s:%d unsupported opcode %d\n", __func__, __LINE__, opcode); - return -EINVAL; - } -} diff --git a/src/gallium/drivers/r600/r600_context.c b/src/gallium/drivers/r600/r600_context.c index 05575b5767..3c5195f79e 100644 --- a/src/gallium/drivers/r600/r600_context.c +++ b/src/gallium/drivers/r600/r600_context.c @@ -55,7 +55,7 @@ static void r600_flush(struct pipe_context *ctx, unsigned flags, */ if (!dc) radeon_ctx_dump_bof(rctx->ctx, "gallium.bof"); -#if 0 +#if 1 radeon_ctx_submit(rctx->ctx); #endif rctx->ctx = radeon_ctx_decref(rctx->ctx); diff --git a/src/gallium/drivers/r600/r600_context.h b/src/gallium/drivers/r600/r600_context.h index f27ff58ed4..669aaec0b2 100644 --- a/src/gallium/drivers/r600/r600_context.h +++ b/src/gallium/drivers/r600/r600_context.h @@ -40,7 +40,6 @@ struct r600_vertex_elements_state }; struct r600_pipe_shader { - unsigned type; struct r600_shader shader; struct radeon_bo *bo; struct radeon_state *state; @@ -92,8 +91,10 @@ struct pipe_context *r600_create_context(struct pipe_screen *screen, void *priv) void r600_pipe_shader_destroy(struct pipe_context *ctx, struct r600_pipe_shader *rpshader); struct r600_pipe_shader *r600_pipe_shader_create(struct pipe_context *ctx, - unsigned type, const struct tgsi_token *tokens); int r600_pipe_shader_update(struct pipe_context *ctx, struct r600_pipe_shader *rpshader); +#define R600_ERR(fmt, args...) \ + fprintf(stderr, "EE %s/%s:%d - "fmt, __FILE__, __func__, __LINE__, ##args) + #endif diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index f7d6e10663..4a6cf40c26 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -19,40 +19,112 @@ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE * USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: - * Jerome Glisse */ -#include -#include -#include -#include -#include -#include +#include "pipe/p_shader_tokens.h" +#include "tgsi/tgsi_parse.h" +#include "tgsi/tgsi_scan.h" +#include "util/u_format.h" #include "r600_screen.h" #include "r600_context.h" +#include "r600_shader.h" +#include "r600_asm.h" +#include "r600_sq.h" #include "r600d.h" +#include +#include + +static int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader); + +static int r600_shader_update(struct pipe_context *ctx, struct r600_shader *shader) +{ + struct r600_context *rctx = r600_context(ctx); + const struct util_format_description *desc; + enum pipe_format resource_format[160]; + unsigned i, nresources = 0; + struct r600_bc *bc = &shader->bc; + struct r600_bc_cf *cf; + struct r600_bc_vtx *vtx; + + if (shader->processor_type != TGSI_PROCESSOR_VERTEX) + return 0; + for (i = 0; i < rctx->vertex_elements->count; i++) { + resource_format[nresources++] = rctx->vertex_elements->elements[i].src_format; + } + LIST_FOR_EACH_ENTRY(cf, &bc->cf, list) { + switch (cf->inst) { + case V_SQ_CF_WORD1_SQ_CF_INST_VTX: + case V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC: + LIST_FOR_EACH_ENTRY(vtx, &cf->vtx, list) { + desc = util_format_description(resource_format[vtx->buffer_id]); + if (desc == NULL) { + R600_ERR("unknown format %d\n", resource_format[vtx->buffer_id]); + return -EINVAL; + } + vtx->dst_sel_x = desc->swizzle[0]; + vtx->dst_sel_y = desc->swizzle[1]; + vtx->dst_sel_z = desc->swizzle[2]; + vtx->dst_sel_w = desc->swizzle[3]; + } + break; + default: + break; + } + } + return r600_bc_build(&shader->bc); +} + +struct r600_pipe_shader *r600_pipe_shader_create(struct pipe_context *ctx, + const struct tgsi_token *tokens) +{ + struct r600_screen *rscreen = r600_screen(ctx->screen); + struct r600_pipe_shader *rpshader = CALLOC_STRUCT(r600_pipe_shader); + int r; + +fprintf(stderr, "--------------------------------------------------------------\n"); +tgsi_dump(tokens, 0); + if (rpshader == NULL) + return NULL; + rpshader->shader.family = radeon_get_family(rscreen->rw); + r = r600_shader_from_tgsi(tokens, &rpshader->shader); + if (r) { + R600_ERR("translation from TGSI failed !\n"); + goto out_err; + } + r = r600_bc_build(&rpshader->shader.bc); + if (r) { + R600_ERR("building bytecode failed !\n"); + goto out_err; + } +fprintf(stderr, "______________________________________________________________\n"); + return rpshader; +out_err: + free(rpshader); + return NULL; +} static int r600_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shader *rpshader) { struct r600_screen *rscreen = r600_screen(ctx->screen); struct r600_shader *rshader = &rpshader->shader; struct radeon_state *state; - unsigned i, tmp; + unsigned i, j, tmp; rpshader->state = radeon_state_decref(rpshader->state); state = radeon_state(rscreen->rw, R600_VS_SHADER_TYPE, R600_VS_SHADER); if (state == NULL) return -ENOMEM; - for (i = 0; i < rshader->noutput; i += 4) { - tmp = rshader->output[i].sid; - tmp |= rshader->output[i + 1].sid << 8; - tmp |= rshader->output[i + 2].sid << 16; - tmp |= rshader->output[i + 3].sid << 24; - state->states[R600_VS_SHADER__SPI_VS_OUT_ID_0 + i / 4] = tmp; - } - state->states[R600_VS_SHADER__SPI_VS_OUT_CONFIG] = S_0286C4_VS_EXPORT_COUNT(rshader->noutput - 1); - state->states[R600_VS_SHADER__SQ_PGM_RESOURCES_VS] = S_028868_NUM_GPRS(rshader->ngpr); + for (i = 0; i < 10; i++) { + state->states[R600_VS_SHADER__SPI_VS_OUT_ID_0 + i] = 0; + } + for (i = 0, j = 0; i < rshader->noutput; i++) { + if (rshader->output[i].name != TGSI_SEMANTIC_POSITION) { + tmp = rshader->output[i].sid << ((j & 3) * 8); + state->states[R600_VS_SHADER__SPI_VS_OUT_ID_0 + j / 4] |= tmp; + j++; + } + } + state->states[R600_VS_SHADER__SPI_VS_OUT_CONFIG] = S_0286C4_VS_EXPORT_COUNT(rshader->noutput - 2); + state->states[R600_VS_SHADER__SQ_PGM_RESOURCES_VS] = S_028868_NUM_GPRS(rshader->bc.ngpr); rpshader->state = state; rpshader->state->bo[0] = radeon_bo_incref(rscreen->rw, rpshader->bo); rpshader->state->bo[1] = radeon_bo_incref(rscreen->rw, rpshader->bo); @@ -81,7 +153,7 @@ static int r600_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader state->states[R600_PS_SHADER__SPI_PS_IN_CONTROL_0] = S_0286CC_NUM_INTERP(rshader->ninput) | S_0286CC_PERSP_GRADIENT_ENA(1); state->states[R600_PS_SHADER__SPI_PS_IN_CONTROL_1] = 0x00000000; - state->states[R600_PS_SHADER__SQ_PGM_RESOURCES_PS] = S_028868_NUM_GPRS(rshader->ngpr); + state->states[R600_PS_SHADER__SQ_PGM_RESOURCES_PS] = S_028868_NUM_GPRS(rshader->bc.ngpr); state->states[R600_PS_SHADER__SQ_PGM_EXPORTS_PS] = 0x00000002; rpshader->state = state; rpshader->state->bo[0] = radeon_bo_incref(rscreen->rw, rpshader->bo); @@ -100,21 +172,21 @@ static int r600_pipe_shader(struct pipe_context *ctx, struct r600_pipe_shader *r /* copy new shader */ radeon_bo_decref(rscreen->rw, rpshader->bo); rpshader->bo = NULL; - rpshader->bo = radeon_bo(rscreen->rw, 0, rshader->ndw * 4, + rpshader->bo = radeon_bo(rscreen->rw, 0, rshader->bc.ndw * 4, 4096, NULL); if (rpshader->bo == NULL) { return -ENOMEM; } radeon_bo_map(rscreen->rw, rpshader->bo); - memcpy(rpshader->bo->data, rshader->bcode, rshader->ndw * 4); + memcpy(rpshader->bo->data, rshader->bc.bytecode, rshader->bc.ndw * 4); radeon_bo_unmap(rscreen->rw, rpshader->bo); /* build state */ rshader->flat_shade = rctx->flat_shade; - switch (rpshader->type) { - case C_PROGRAM_TYPE_VS: + switch (rshader->processor_type) { + case TGSI_PROCESSOR_VERTEX: r = r600_pipe_shader_vs(ctx, rpshader); break; - case C_PROGRAM_TYPE_FS: + case TGSI_PROCESSOR_FRAGMENT: r = r600_pipe_shader_ps(ctx, rpshader); break; default: @@ -124,104 +196,610 @@ static int r600_pipe_shader(struct pipe_context *ctx, struct r600_pipe_shader *r return r; } -struct r600_pipe_shader *r600_pipe_shader_create(struct pipe_context *ctx, unsigned type, const struct tgsi_token *tokens) +int r600_pipe_shader_update(struct pipe_context *ctx, struct r600_pipe_shader *rpshader) { - struct r600_screen *rscreen = r600_screen(ctx->screen); - struct r600_pipe_shader *rpshader = CALLOC_STRUCT(r600_pipe_shader); - struct r600_shader *rshader = &rpshader->shader; + struct r600_context *rctx = r600_context(ctx); int r; - enum radeon_family family; if (rpshader == NULL) - return NULL; - rpshader->type = type; - family = radeon_get_family(rscreen->rw); - rshader->r6xx_compile = (family >= CHIP_R600 && family < CHIP_RV770); - LIST_INITHEAD(&rshader->nodes); - fprintf(stderr, "<< %s\n", rshader->r6xx_compile ? "R600" : "R700"); - tgsi_dump(tokens, 0); - fprintf(stderr, "--------------------------------------------------------------\n"); - r = c_shader_from_tgsi(&rshader->cshader, type, tokens); - if (r) { - r600_pipe_shader_destroy(ctx, rpshader); - fprintf(stderr, "ERROR(%s %d)>>\n\n", __func__, __LINE__); - return NULL; + return -EINVAL; + /* there should be enough input */ + if (rctx->vertex_elements->count < rpshader->shader.bc.nresource) { + R600_ERR("%d resources provided, expecting %d\n", + rctx->vertex_elements->count, rpshader->shader.bc.nresource); + return -EINVAL; } - r = r600_shader_insert_fetch(&rshader->cshader); - if (r) { - r600_pipe_shader_destroy(ctx, rpshader); - fprintf(stderr, "ERROR(%s %d)>>\n\n", __func__, __LINE__); - return NULL; + r = r600_shader_update(ctx, &rpshader->shader); + if (r) + return r; + return r600_pipe_shader(ctx, rpshader); +} + +struct r600_shader_tgsi_instruction; + +struct r600_shader_ctx { + struct tgsi_shader_info info; + struct tgsi_parse_context parse; + const struct tgsi_token *tokens; + unsigned type; + unsigned file_offset[TGSI_FILE_COUNT]; + unsigned temp_reg; + struct r600_shader_tgsi_instruction *inst_info; + struct r600_bc *bc; + struct r600_shader *shader; +}; + +struct r600_shader_tgsi_instruction { + unsigned tgsi_opcode; + unsigned is_op3; + unsigned r600_opcode; + int (*process)(struct r600_shader_ctx *ctx); +}; + +static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[]; + +static int tgsi_is_supported(struct r600_shader_ctx *ctx) +{ + struct tgsi_full_instruction *i = &ctx->parse.FullToken.FullInstruction; + int j; + + if (i->Instruction.NumDstRegs > 1) { + R600_ERR("too many dst (%d)\n", i->Instruction.NumDstRegs); + return -EINVAL; } - r = c_shader_build_dominator_tree(&rshader->cshader); - if (r) { - r600_pipe_shader_destroy(ctx, rpshader); - fprintf(stderr, "ERROR(%s %d)>>\n\n", __func__, __LINE__); - return NULL; + if (i->Instruction.Saturate) { + R600_ERR("staturate unsupported\n"); + return -EINVAL; } - r = r600_cshader_legalize(&rshader->cshader); - if (r) { - r600_pipe_shader_destroy(ctx, rpshader); - fprintf(stderr, "ERROR(%s %d)>>\n\n", __func__, __LINE__); - return NULL; + if (i->Instruction.Predicate) { + R600_ERR("predicate unsupported\n"); + return -EINVAL; } - c_shader_dump(&rshader->cshader); - r = r700_shader_translate(rshader); - if (r) { - r600_pipe_shader_destroy(ctx, rpshader); - fprintf(stderr, "ERROR(%s %d)>>\n\n", __func__, __LINE__); - return NULL; + if (i->Instruction.Label) { + R600_ERR("label unsupported\n"); + return -EINVAL; } -#if 1 -#if 0 - fprintf(stderr, "--------------------------------------------------------------\n"); - for (int i = 0; i < rshader->ndw; i++) { - fprintf(stderr, "0x%08X\n", rshader->bcode[i]); + if (i->Instruction.Texture) { + R600_ERR("texture unsupported\n"); + return -EINVAL; } -#endif - fprintf(stderr, ">>\n\n"); -#endif - return rpshader; + for (j = 0; j < i->Instruction.NumSrcRegs; j++) { + if (i->Src[j].Register.Indirect || + i->Src[j].Register.Dimension || + i->Src[j].Register.Absolute) { + R600_ERR("unsupported src (indirect|dimension|absolute)\n"); + return -EINVAL; + } + } + for (j = 0; j < i->Instruction.NumDstRegs; j++) { + if (i->Dst[j].Register.Indirect || i->Dst[j].Register.Dimension) { + R600_ERR("unsupported dst (indirect|dimension)\n"); + return -EINVAL; + } + } + return 0; } -void r600_pipe_shader_destroy(struct pipe_context *ctx, struct r600_pipe_shader *rpshader) +static int tgsi_declaration(struct r600_shader_ctx *ctx) { - struct r600_screen *rscreen = r600_screen(ctx->screen); + struct tgsi_full_declaration *d = &ctx->parse.FullToken.FullDeclaration; + struct r600_bc_vtx vtx; + unsigned i; + int r; - if (rpshader == NULL) - return; - radeon_bo_decref(rscreen->rw, rpshader->bo); - rpshader->bo = NULL; - r600_shader_cleanup(&rpshader->shader); - FREE(rpshader); + switch (d->Declaration.File) { + case TGSI_FILE_INPUT: + i = ctx->shader->ninput++; + ctx->shader->input[i].name = d->Semantic.Name; + ctx->shader->input[i].sid = d->Semantic.Index; + ctx->shader->input[i].gpr = ctx->file_offset[TGSI_FILE_INPUT] + i; + if (ctx->type == TGSI_PROCESSOR_VERTEX) { + /* turn input into fetch */ + memset(&vtx, 0, sizeof(struct r600_bc_vtx)); + vtx.inst = 0; + vtx.fetch_type = 0; + vtx.buffer_id = i; + /* register containing the index into the buffer */ + vtx.src_gpr = 0; + vtx.src_sel_x = 0; + vtx.mega_fetch_count = 0x1F; + vtx.dst_gpr = ctx->shader->input[i].gpr; + vtx.dst_sel_x = 0; + vtx.dst_sel_y = 1; + vtx.dst_sel_z = 2; + vtx.dst_sel_w = 3; + r = r600_bc_add_vtx(ctx->bc, &vtx); + if (r) + return r; + } + break; + case TGSI_FILE_OUTPUT: + i = ctx->shader->noutput++; + ctx->shader->output[i].name = d->Semantic.Name; + ctx->shader->output[i].sid = d->Semantic.Index; + ctx->shader->output[i].gpr = ctx->file_offset[TGSI_FILE_OUTPUT] + i; + break; + case TGSI_FILE_CONSTANT: + case TGSI_FILE_TEMPORARY: + break; + default: + R600_ERR("unsupported file %d declaration\n", d->Declaration.File); + return -EINVAL; + } + return 0; } -int r600_pipe_shader_update(struct pipe_context *ctx, struct r600_pipe_shader *rpshader) +int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader) { - struct r600_context *rctx = r600_context(ctx); - struct r600_shader *rshader; - enum pipe_format resource_format[160]; - unsigned i, nresources = 0; - int r; + struct tgsi_full_immediate *immediate; + struct r600_shader_ctx ctx; + struct r600_bc_output output; + unsigned opcode; + int i, r = 0, pos0; + u32 value[4]; - if (rpshader == NULL) - return -EINVAL; - rshader = &rpshader->shader; - switch (rpshader->type) { - case C_PROGRAM_TYPE_VS: - for (i = 0; i < rctx->vertex_elements->count; i++) { - resource_format[nresources++] = rctx->vertex_elements->elements[i].src_format; + ctx.bc = &shader->bc; + ctx.shader = shader; + r = r600_bc_init(ctx.bc, shader->family); + if (r) + return r; + ctx.tokens = tokens; + tgsi_scan_shader(tokens, &ctx.info); + tgsi_parse_init(&ctx.parse, tokens); + ctx.type = ctx.parse.FullHeader.Processor.Processor; + shader->processor_type = ctx.type; + + /* register allocations */ + /* Values [0,127] correspond to GPR[0..127]. + * Values [256,511] correspond to cfile constants c[0..255]. + * Other special values are shown in the list below. + * 248 SQ_ALU_SRC_0: special constant 0.0. + * 249 SQ_ALU_SRC_1: special constant 1.0 float. + * 250 SQ_ALU_SRC_1_INT: special constant 1 integer. + * 251 SQ_ALU_SRC_M_1_INT: special constant -1 integer. + * 252 SQ_ALU_SRC_0_5: special constant 0.5 float. + * 253 SQ_ALU_SRC_LITERAL: literal constant. + * 254 SQ_ALU_SRC_PV: previous vector result. + * 255 SQ_ALU_SRC_PS: previous scalar result. + */ + for (i = 0; i < TGSI_FILE_COUNT; i++) { + ctx.file_offset[i] = 0; + } + if (ctx.type == TGSI_PROCESSOR_VERTEX) { + ctx.file_offset[TGSI_FILE_INPUT] = 1; + } + ctx.file_offset[TGSI_FILE_OUTPUT] = ctx.file_offset[TGSI_FILE_INPUT] + + ctx.info.file_count[TGSI_FILE_INPUT]; + ctx.file_offset[TGSI_FILE_TEMPORARY] = ctx.file_offset[TGSI_FILE_OUTPUT] + + ctx.info.file_count[TGSI_FILE_OUTPUT]; + ctx.file_offset[TGSI_FILE_CONSTANT] = 256; + ctx.file_offset[TGSI_FILE_IMMEDIATE] = 253; + ctx.temp_reg = ctx.file_offset[TGSI_FILE_TEMPORARY] + + ctx.info.file_count[TGSI_FILE_TEMPORARY]; + + while (!tgsi_parse_end_of_tokens(&ctx.parse)) { + tgsi_parse_token(&ctx.parse); + switch (ctx.parse.FullToken.Token.Type) { + case TGSI_TOKEN_TYPE_IMMEDIATE: +// R600_ERR("TGSI_TOKEN_TYPE_IMMEDIATE unsupported\n"); + immediate = &ctx.parse.FullToken.FullImmediate; + value[0] = immediate->u[0].Uint; + value[1] = immediate->u[1].Uint; + value[2] = immediate->u[2].Uint; + value[3] = immediate->u[3].Uint; + break; + case TGSI_TOKEN_TYPE_DECLARATION: + r = tgsi_declaration(&ctx); + if (r) + goto out_err; + break; + case TGSI_TOKEN_TYPE_INSTRUCTION: + r = tgsi_is_supported(&ctx); + if (r) + goto out_err; + opcode = ctx.parse.FullToken.FullInstruction.Instruction.Opcode; + ctx.inst_info = &r600_shader_tgsi_instruction[opcode]; + r = ctx.inst_info->process(&ctx); + if (r) + goto out_err; + r = r600_bc_add_literal(ctx.bc, value); + if (r) + goto out_err; + break; + default: + R600_ERR("unsupported token type %d\n", ctx.parse.FullToken.Token.Type); + r = -EINVAL; + goto out_err; + } + } + /* export output */ + for (i = 0, pos0 = 0; i < shader->noutput; i++) { + memset(&output, 0, sizeof(struct r600_bc_output)); + output.gpr = shader->output[i].gpr; + output.elem_size = 3; + output.swizzle_x = 0; + output.swizzle_y = 1; + output.swizzle_z = 2; + output.swizzle_w = 3; + output.barrier = 1; + output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM; + output.array_base = i - pos0; + output.inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE; + switch (ctx.type == TGSI_PROCESSOR_VERTEX) { + case TGSI_PROCESSOR_VERTEX: + if (shader->output[i].name == TGSI_SEMANTIC_POSITION) { + output.array_base = 60; + output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; + /* position doesn't count in array_base */ + pos0 = 1; + } + break; + case TGSI_PROCESSOR_FRAGMENT: + if (shader->output[i].name == TGSI_SEMANTIC_COLOR) { + output.array_base = 0; + output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; + } else { + R600_ERR("unsupported fragment output name %d\n", shader->output[i].name); + r = -EINVAL; + goto out_err; + } + break; + default: + R600_ERR("unsupported processor type %d\n", ctx.type); + r = -EINVAL; + goto out_err; } + if (i == (shader->noutput - 1)) { + output.end_of_program = 1; + } + r = r600_bc_add_output(ctx.bc, &output); + if (r) + goto out_err; + } + tgsi_parse_free(&ctx.parse); + return 0; +out_err: + tgsi_parse_free(&ctx.parse); + return r; +} + +static int tgsi_unsupported(struct r600_shader_ctx *ctx) +{ + R600_ERR("%d tgsi opcode unsupported\n", ctx->inst_info->tgsi_opcode); + return -EINVAL; +} + +static int tgsi_end(struct r600_shader_ctx *ctx) +{ + return 0; +} + +static int tgsi_src(struct r600_shader_ctx *ctx, + const struct tgsi_full_src_register *tgsi_src, + unsigned swizzle, + struct r600_bc_alu_src *r600_src) +{ + r600_src->sel = tgsi_src->Register.Index; + if (tgsi_src->Register.File == TGSI_FILE_IMMEDIATE) { + r600_src->sel = 0; + } + r600_src->sel += ctx->file_offset[tgsi_src->Register.File]; + switch (swizzle) { + case 0: + r600_src->chan = tgsi_src->Register.SwizzleX; break; - default: + case 1: + r600_src->chan = tgsi_src->Register.SwizzleY; break; - } - /* there should be enough input */ - if (nresources < rshader->nresource) + case 2: + r600_src->chan = tgsi_src->Register.SwizzleZ; + break; + case 3: + r600_src->chan = tgsi_src->Register.SwizzleW; + break; + default: return -EINVAL; - /* FIXME compare resources */ - r = r600_shader_update(rshader, resource_format); - if (r) - return r; - return r600_pipe_shader(ctx, rpshader); + } + return 0; +} + +static int tgsi_dst(struct r600_shader_ctx *ctx, + const struct tgsi_full_dst_register *tgsi_dst, + unsigned swizzle, + struct r600_bc_alu_dst *r600_dst) +{ + r600_dst->sel = tgsi_dst->Register.Index; + r600_dst->sel += ctx->file_offset[tgsi_dst->Register.File]; + r600_dst->chan = swizzle; + r600_dst->write = 1; + return 0; +} + +static int tgsi_op2(struct r600_shader_ctx *ctx) +{ + struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; + struct r600_bc_alu alu; + int i, j, r; + + for (i = 0; i < 4; i++) { + memset(&alu, 0, sizeof(struct r600_bc_alu)); + if (!(inst->Dst[0].Register.WriteMask & (1 << i))) { + alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP; + } else { + alu.inst = ctx->inst_info->r600_opcode; + for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { + r = tgsi_src(ctx, &inst->Src[j], i, &alu.src[j]); + if (r) + return r; + } + r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); + if (r) + return r; + } + /* handle some special cases */ + switch (ctx->inst_info->tgsi_opcode) { + case TGSI_OPCODE_SUB: + alu.src[1].neg = 1; + break; + case TGSI_OPCODE_DP2: + if (i > 1) { + alu.src[0].sel = alu.src[1].sel = 248; + alu.src[0].chan = alu.src[1].chan = 0; + } + break; + case TGSI_OPCODE_DP3: + if (i > 2) { + alu.src[0].sel = alu.src[1].sel = 248; + alu.src[0].chan = alu.src[1].chan = 0; + } + break; + default: + break; + } + if (i == 3) { + alu.last = 1; + } + alu.nliteral = 0; + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + } + return 0; +} + +static int tgsi_slt(struct r600_shader_ctx *ctx) +{ + struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; + struct r600_bc_alu alu; + int i, r; + + for (i = 0; i < 4; i++) { + memset(&alu, 0, sizeof(struct r600_bc_alu)); + if (!(inst->Dst[0].Register.WriteMask & (1 << i))) { + alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP; + } else { + alu.inst = ctx->inst_info->r600_opcode; + r = tgsi_src(ctx, &inst->Src[0], i, &alu.src[1]); + if (r) + return r; + r = tgsi_src(ctx, &inst->Src[1], i, &alu.src[0]); + if (r) + return r; + r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); + if (r) + return r; + } + if (i == 3) { + alu.last = 1; + } + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + } + return 0; } + +static int tgsi_op3(struct r600_shader_ctx *ctx) +{ + struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; + struct r600_bc_alu alu; + int i, j, r; + + /* do it in 2 step as op3 doesn't support writemask */ + for (i = 0; i < 4; i++) { + memset(&alu, 0, sizeof(struct r600_bc_alu)); + alu.inst = ctx->inst_info->r600_opcode; + for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { + r = tgsi_src(ctx, &inst->Src[j], i, &alu.src[j]); + if (r) + return r; + } + alu.dst.sel = ctx->temp_reg; + alu.dst.chan = i; + alu.is_op3 = 1; + if (i == 3) { + alu.last = 1; + } + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + } + for (i = 0; i < 4; i++) { + memset(&alu, 0, sizeof(struct r600_bc_alu)); + if (!(inst->Dst[0].Register.WriteMask & (1 << i))) { + alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP; + } else { + alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV; + r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); + if (r) + return r; + alu.src[0].sel = ctx->temp_reg; + alu.src[0].chan = i; + } + if (i == 3) { + alu.last = 1; + } + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + } + return 0; +} + +static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = { + {TGSI_OPCODE_ARL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_MOV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2}, + {TGSI_OPCODE_LIT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_RCP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_RSQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_EXP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_LOG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_MUL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2}, + {TGSI_OPCODE_ADD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2}, + {TGSI_OPCODE_DP3, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_op2}, + {TGSI_OPCODE_DP4, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_op2}, + {TGSI_OPCODE_DST, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_MIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_MAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2}, + {TGSI_OPCODE_SLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_slt}, + {TGSI_OPCODE_SGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_MAD, 1, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3}, + {TGSI_OPCODE_SUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2}, + {TGSI_OPCODE_LRP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_CND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + /* gap */ + {20, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_DP2A, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + /* gap */ + {22, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {23, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_FRC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_CLAMP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_FLR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_ROUND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_EX2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_LG2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_POW, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_XPD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + /* gap */ + {32, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_ABS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_RCC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_DPH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_COS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_DDX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_DDY, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_KILP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, /* predicated kill */ + {TGSI_OPCODE_PK2H, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_PK2US, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_PK4B, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_PK4UB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_RFL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_SEQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_SFL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_SGT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_SIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_SLE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_SNE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_STR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_TEX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_TXD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_TXP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_UP2H, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_UP2US, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_UP4B, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_UP4UB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_X2D, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_ARA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_ARR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_BRA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_CAL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_RET, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_SSG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, /* SGN */ + {TGSI_OPCODE_CMP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_SCS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_TXB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_NRM, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_DIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_DP2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_op2}, + {TGSI_OPCODE_TXL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_BRK, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_IF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + /* gap */ + {75, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {76, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_ELSE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_ENDIF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + /* gap */ + {79, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {80, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_PUSHA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_POPA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_CEIL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_I2F, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_NOT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_TRUNC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_SHL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + /* gap */ + {88, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_AND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_OR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_MOD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_XOR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_SAD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_TXF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_TXQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_CONT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_EMIT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_ENDPRIM, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_BGNLOOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_BGNSUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_ENDLOOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_ENDSUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + /* gap */ + {103, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {104, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {105, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {106, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_NOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + /* gap */ + {108, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {109, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {110, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {111, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_NRM4, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_CALLNZ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_IFC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_BREAKC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_KIL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, /* conditional kill */ + {TGSI_OPCODE_END, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end}, /* aka HALT */ + /* gap */ + {118, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_F2I, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_IDIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_IMAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_IMIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_INEG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_ISGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_ISHR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_ISLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_F2U, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_U2F, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_UADD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_UDIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_UMAD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_UMAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_UMIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_UMOD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_UMUL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_USEQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_USGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_USHR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_USLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_USNE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_SWITCH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_CASE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_DEFAULT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_ENDSWITCH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_LAST, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, +}; diff --git a/src/gallium/drivers/r600/r600_shader.h b/src/gallium/drivers/r600/r600_shader.h index 40064ba8a9..23b6a83b9a 100644 --- a/src/gallium/drivers/r600/r600_shader.h +++ b/src/gallium/drivers/r600/r600_shader.h @@ -23,243 +23,23 @@ #ifndef R600_SHADER_H #define R600_SHADER_H -#include "r600_compiler.h" -#include "radeon.h" - -struct r600_shader_operand { - struct c_vector *vector; - unsigned sel; - unsigned chan; - unsigned neg; - unsigned abs; -}; - -struct r600_shader_vfetch { - struct list_head head; - unsigned cf_addr; - struct r600_shader_operand src[2]; - struct r600_shader_operand dst[4]; -}; - -struct r600_shader_inst { - unsigned is_op3; - unsigned opcode; - unsigned inst; - struct r600_shader_operand src[3]; - struct r600_shader_operand dst; - unsigned last; -}; - -struct r600_shader_alu { - struct list_head head; - unsigned nalu; - unsigned nliteral; - unsigned nconstant; - struct r600_shader_inst alu[5]; - u32 literal[4]; -}; - -struct r600_shader_node { - struct list_head head; - unsigned cf_id; /**< cf index (in dw) in byte code */ - unsigned cf_addr; /**< instructions index (in dw) in byte code */ - unsigned nslot; /**< number of slot (2 dw) needed by this node */ - unsigned nfetch; - struct c_node *node; /**< compiler node from which this node originate */ - struct list_head vfetch; /**< list of vfetch instructions */ - struct list_head alu; /**< list of alu instructions */ -}; +#include "r600_asm.h" struct r600_shader_io { - unsigned name; - unsigned gpr; - int sid; + unsigned name; + unsigned gpr; + int sid; }; struct r600_shader { - unsigned stack_size; /**< stack size needed by this shader */ - unsigned ngpr; /**< number of GPR needed by this shader */ - unsigned nconstant; /**< number of constants used by this shader */ - unsigned nresource; /**< number of resources used by this shader */ - unsigned noutput; - unsigned ninput; - unsigned nvector; - unsigned ncf; /**< total number of cf clauses */ - unsigned nslot; /**< total number of slots (2 dw) */ - unsigned flat_shade; /**< are we flat shading */ - struct list_head nodes; /**< list of node */ - struct r600_shader_io input[32]; - struct r600_shader_io output[32]; - /* TODO replace GPR by some better register allocator */ - struct c_vector **gpr; - unsigned ndw; /**< bytes code size in dw */ - u32 *bcode; /**< bytes code */ - enum pipe_format resource_format[160]; /**< format of resource */ - struct c_shader cshader; - boolean r6xx_compile; + unsigned processor_type; + struct r600_bc bc; + boolean flat_shade; + unsigned ninput; + unsigned noutput; + struct r600_shader_io input[32]; + struct r600_shader_io output[32]; + enum radeon_family family; }; -void r600_shader_cleanup(struct r600_shader *rshader); -int r600_shader_register(struct r600_shader *rshader); -int r600_shader_node(struct r600_shader *shader); -void r600_shader_node_place(struct r600_shader *rshader); -int r600_shader_find_gpr(struct r600_shader *rshader, struct c_vector *v, unsigned swizzle, - struct r600_shader_operand *operand); -int r600_shader_vfetch_bytecode(struct r600_shader *rshader, - struct r600_shader_node *rnode, - struct r600_shader_vfetch *vfetch, - unsigned *cid); -int r600_shader_update(struct r600_shader *rshader, - enum pipe_format *resource_format); -int r600_shader_legalize(struct r600_shader *rshader); -int r600_cshader_legalize(struct c_shader *shader); - -int r700_shader_translate(struct r600_shader *rshader); - -int c_shader_from_tgsi(struct c_shader *shader, unsigned type, - const struct tgsi_token *tokens); -int r600_shader_register(struct r600_shader *rshader); -int r600_shader_translate_rec(struct r600_shader *rshader, struct c_node *node); -int r700_shader_translate(struct r600_shader *rshader); -int r600_shader_insert_fetch(struct c_shader *shader); - -int r6xx_shader_alu_translate(struct r600_shader *rshader, - struct r600_shader_node *rnode, - unsigned *cid); - -enum r600_instruction { - INST_ADD = 0, - INST_MUL = 1, - INST_MUL_IEEE = 2, - INST_MAX = 3, - INST_MIN = 4, - INST_MAX_DX10 = 5, - INST_MIN_DX10 = 6, - INST_SETE = 7, - INST_SETGT = 8, - INST_SETGE = 9, - INST_SETNE = 10, - INST_SETE_DX10 = 11, - INST_SETGT_DX10 = 12, - INST_SETGE_DX10 = 13, - INST_SETNE_DX10 = 14, - INST_FRACT = 15, - INST_TRUNC = 16, - INST_CEIL = 17, - INST_RNDNE = 18, - INST_FLOOR = 19, - INST_MOVA = 20, - INST_MOVA_FLOOR = 21, - INST_MOVA_INT = 22, - INST_MOV = 23, - INST_NOP = 24, - INST_PRED_SETGT_UINT = 25, - INST_PRED_SETGE_UINT = 26, - INST_PRED_SETE = 27, - INST_PRED_SETGT = 28, - INST_PRED_SETGE = 29, - INST_PRED_SETNE = 30, - INST_PRED_SET_INV = 31, - INST_PRED_SET_POP = 32, - INST_PRED_SET_CLR = 33, - INST_PRED_SET_RESTORE = 34, - INST_PRED_SETE_PUSH = 35, - INST_PRED_SETGT_PUSH = 36, - INST_PRED_SETGE_PUSH = 37, - INST_PRED_SETNE_PUSH = 38, - INST_KILLE = 39, - INST_KILLGT = 40, - INST_KILLGE = 41, - INST_KILLNE = 42, - INST_AND_INT = 43, - INST_OR_INT = 44, - INST_XOR_INT = 45, - INST_NOT_INT = 46, - INST_ADD_INT = 47, - INST_SUB_INT = 48, - INST_MAX_INT = 49, - INST_MIN_INT = 50, - INST_MAX_UINT = 51, - INST_MIN_UINT = 52, - INST_SETE_INT = 53, - INST_SETGT_INT = 54, - INST_SETGE_INT = 55, - INST_SETNE_INT = 56, - INST_SETGT_UINT = 57, - INST_SETGE_UINT = 58, - INST_KILLGT_UINT = 59, - INST_KILLGE_UINT = 60, - INST_PRED_SETE_INT = 61, - INST_PRED_SETGT_INT = 62, - INST_PRED_SETGE_INT = 63, - INST_PRED_SETNE_INT = 64, - INST_KILLE_INT = 65, - INST_KILLGT_INT = 66, - INST_KILLGE_INT = 67, - INST_KILLNE_INT = 68, - INST_PRED_SETE_PUSH_INT = 69, - INST_PRED_SETGT_PUSH_INT = 70, - INST_PRED_SETGE_PUSH_INT = 71, - INST_PRED_SETNE_PUSH_INT = 72, - INST_PRED_SETLT_PUSH_INT = 73, - INST_PRED_SETLE_PUSH_INT = 74, - INST_DOT4 = 75, - INST_DOT4_IEEE = 76, - INST_CUBE = 77, - INST_MAX4 = 78, - INST_MOVA_GPR_INT = 79, - INST_EXP_IEEE = 80, - INST_LOG_CLAMPED = 81, - INST_LOG_IEEE = 82, - INST_RECIP_CLAMPED = 83, - INST_RECIP_FF = 84, - INST_RECIP_IEEE = 85, - INST_RECIPSQRT_CLAMPED = 86, - INST_RECIPSQRT_FF = 87, - INST_RECIPSQRT_IEEE = 88, - INST_SQRT_IEEE = 89, - INST_FLT_TO_INT = 90, - INST_INT_TO_FLT = 91, - INST_UINT_TO_FLT = 92, - INST_SIN = 93, - INST_COS = 94, - INST_ASHR_INT = 95, - INST_LSHR_INT = 96, - INST_LSHL_INT = 97, - INST_MULLO_INT = 98, - INST_MULHI_INT = 99, - INST_MULLO_UINT = 100, - INST_MULHI_UINT = 101, - INST_RECIP_INT = 102, - INST_RECIP_UINT = 103, - INST_FLT_TO_UINT = 104, - INST_MUL_LIT = 105, - INST_MUL_LIT_M2 = 106, - INST_MUL_LIT_M4 = 107, - INST_MUL_LIT_D2 = 108, - INST_MULADD = 109, - INST_MULADD_M2 = 110, - INST_MULADD_M4 = 111, - INST_MULADD_D2 = 112, - INST_MULADD_IEEE = 113, - INST_MULADD_IEEE_M2 = 114, - INST_MULADD_IEEE_M4 = 115, - INST_MULADD_IEEE_D2 = 116, - INST_CNDE = 117, - INST_CNDGT = 118, - INST_CNDGE = 119, - INST_CNDE_INT = 120, - INST_CNDGT_INT = 121, - INST_CNDGE_INT = 122, - INST_COUNT -}; - -struct r600_instruction_info { - enum r600_instruction instruction; - unsigned opcode; - unsigned is_trans; - unsigned is_op3; -}; - - #endif diff --git a/src/gallium/drivers/r600/r600_sq.h b/src/gallium/drivers/r600/r600_sq.h index 447ba98f00..4770ab0bf7 100644 --- a/src/gallium/drivers/r600/r600_sq.h +++ b/src/gallium/drivers/r600/r600_sq.h @@ -87,9 +87,9 @@ #define G_SQ_CF_WORD1_BARRIER(x) (((x) >> 31) & 0x1) #define C_SQ_CF_WORD1_BARRIER 0x7FFFFFFF #define P_SQ_CF_ALU_WORD0 -#define S_SQ_CF_ALU_WORD0_ALU_ADDR(x) (((x) & 0x3FFFFF) << 0) -#define G_SQ_CF_ALU_WORD0_ALU_ADDR(x) (((x) >> 0) & 0x3FFFFF) -#define C_SQ_CF_ALU_WORD0_ALU_ADDR 0xFFC00000 +#define S_SQ_CF_ALU_WORD0_ADDR(x) (((x) & 0x3FFFFF) << 0) +#define G_SQ_CF_ALU_WORD0_ADDR(x) (((x) >> 0) & 0x3FFFFF) +#define C_SQ_CF_ALU_WORD0_ADDR 0xFFC00000 #define S_SQ_CF_ALU_WORD0_KCACHE_BANK0(x) (((x) & 0xF) << 22) #define G_SQ_CF_ALU_WORD0_KCACHE_BANK0(x) (((x) >> 22) & 0xF) #define C_SQ_CF_ALU_WORD0_KCACHE_BANK0 0xFC3FFFFF @@ -109,15 +109,15 @@ #define S_SQ_CF_ALU_WORD1_KCACHE_ADDR1(x) (((x) & 0xFF) << 10) #define G_SQ_CF_ALU_WORD1_KCACHE_ADDR1(x) (((x) >> 10) & 0xFF) #define C_SQ_CF_ALU_WORD1_KCACHE_ADDR1 0xFFFC03FF -#define S_SQ_CF_ALU_WORD1_ALU_COUNT(x) (((x) & 0x7F) << 18) -#define G_SQ_CF_ALU_WORD1_ALU_COUNT(x) (((x) >> 18) & 0x7F) -#define C_SQ_CF_ALU_WORD1_ALU_COUNT 0xFE03FFFF +#define S_SQ_CF_ALU_WORD1_COUNT(x) (((x) & 0x7F) << 18) +#define G_SQ_CF_ALU_WORD1_COUNT(x) (((x) >> 18) & 0x7F) +#define C_SQ_CF_ALU_WORD1_COUNT 0xFE03FFFF #define S_SQ_CF_ALU_WORD1_USES_WATERFALL(x) (((x) & 0x1) << 25) #define G_SQ_CF_ALU_WORD1_USES_WATERFALL(x) (((x) >> 25) & 0x1) #define C_SQ_CF_ALU_WORD1_USES_WATERFALL 0xFDFFFFFF -#define S_SQ_CF_ALU_WORD1_CF_ALU_INST(x) (((x) & 0xF) << 26) -#define G_SQ_CF_ALU_WORD1_CF_ALU_INST(x) (((x) >> 26) & 0xF) -#define C_SQ_CF_ALU_WORD1_CF_ALU_INST 0xC3FFFFFF +#define S_SQ_CF_ALU_WORD1_CF_INST(x) (((x) & 0xF) << 26) +#define G_SQ_CF_ALU_WORD1_CF_INST(x) (((x) >> 26) & 0xF) +#define C_SQ_CF_ALU_WORD1_CF_INST 0xC3FFFFFF #define V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU 0x00000008 #define V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE 0x00000009 #define V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER 0x0000000A diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index 4150f88785..84a13e4ef7 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -151,7 +151,7 @@ static void r600_set_framebuffer_state(struct pipe_context *ctx, static void *r600_create_fs_state(struct pipe_context *ctx, const struct pipe_shader_state *shader) { - return r600_pipe_shader_create(ctx, C_PROGRAM_TYPE_FS, shader->tokens); + return r600_pipe_shader_create(ctx, shader->tokens); } static void r600_bind_fs_state(struct pipe_context *ctx, void *state) @@ -164,7 +164,7 @@ static void r600_bind_fs_state(struct pipe_context *ctx, void *state) static void *r600_create_vs_state(struct pipe_context *ctx, const struct pipe_shader_state *shader) { - return r600_pipe_shader_create(ctx, C_PROGRAM_TYPE_VS, shader->tokens); + return r600_pipe_shader_create(ctx, shader->tokens); } static void r600_bind_vs_state(struct pipe_context *ctx, void *state) diff --git a/src/gallium/drivers/r600/r700_asm.c b/src/gallium/drivers/r600/r700_asm.c new file mode 100644 index 0000000000..3532ba5b0c --- /dev/null +++ b/src/gallium/drivers/r600/r700_asm.c @@ -0,0 +1,70 @@ +/* + * Copyright 2010 Jerome Glisse + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#include "r600_asm.h" +#include "r600_context.h" +#include "util/u_memory.h" +#include "r700_sq.h" +#include +#include + +int r700_bc_alu_build(struct r600_bc *bc, struct r600_bc_alu *alu, unsigned id) +{ + unsigned i; + + /* don't replace gpr by pv or ps for destination register */ + if (alu->is_op3) { + bc->bytecode[id++] = S_SQ_ALU_WORD0_SRC0_SEL(alu->src[0].sel) | + S_SQ_ALU_WORD0_SRC0_CHAN(alu->src[0].chan) | + S_SQ_ALU_WORD0_SRC1_SEL(alu->src[1].sel) | + S_SQ_ALU_WORD0_SRC1_CHAN(alu->src[1].chan) | + S_SQ_ALU_WORD0_LAST(alu->last); + bc->bytecode[id++] = S_SQ_ALU_WORD1_DST_GPR(alu->dst.sel) | + S_SQ_ALU_WORD1_DST_CHAN(alu->dst.chan) | + S_SQ_ALU_WORD1_OP3_SRC2_SEL(alu->src[2].sel) | + S_SQ_ALU_WORD1_OP3_SRC2_CHAN(alu->src[2].chan) | + S_SQ_ALU_WORD1_OP3_SRC2_NEG(alu->src[2].neg) | + S_SQ_ALU_WORD1_OP3_ALU_INST(alu->inst) | + S_SQ_ALU_WORD1_BANK_SWIZZLE(0); + } else { + bc->bytecode[id++] = S_SQ_ALU_WORD0_SRC0_SEL(alu->src[0].sel) | + S_SQ_ALU_WORD0_SRC0_CHAN(alu->src[0].chan) | + S_SQ_ALU_WORD0_SRC0_NEG(alu->src[0].neg) | + S_SQ_ALU_WORD0_SRC1_SEL(alu->src[1].sel) | + S_SQ_ALU_WORD0_SRC1_CHAN(alu->src[1].chan) | + S_SQ_ALU_WORD0_SRC1_NEG(alu->src[1].neg) | + S_SQ_ALU_WORD0_LAST(alu->last); + bc->bytecode[id++] = S_SQ_ALU_WORD1_DST_GPR(alu->dst.sel) | + S_SQ_ALU_WORD1_DST_CHAN(alu->dst.chan) | + S_SQ_ALU_WORD1_OP2_SRC0_ABS(alu->src[0].abs) | + S_SQ_ALU_WORD1_OP2_SRC1_ABS(alu->src[1].abs) | + S_SQ_ALU_WORD1_OP2_WRITE_MASK(alu->dst.write) | + S_SQ_ALU_WORD1_OP2_ALU_INST(alu->inst) | + S_SQ_ALU_WORD1_BANK_SWIZZLE(0); + } + if (alu->last) { + for (i = 0; i < alu->nliteral; i++) { + bc->bytecode[id++] = alu->value[i]; + } + } + return 0; +} -- cgit v1.2.3 From 5cc2974dff346f3fa53881dbcc158e4563915487 Mon Sep 17 00:00:00 2001 From: Jerome Glisse Date: Fri, 23 Jul 2010 17:49:26 -0400 Subject: r600g: add RSQ token support Could serve as an example on how to add more token support. Signed-off-by: Jerome Glisse --- src/gallium/drivers/r600/r600_shader.c | 30 ++++++++++++++++++++++++++++-- 1 file changed, 28 insertions(+), 2 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index 4a6cf40c26..e983cc90b4 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -559,7 +559,6 @@ static int tgsi_op2(struct r600_shader_ctx *ctx) if (i == 3) { alu.last = 1; } - alu.nliteral = 0; r = r600_bc_add_alu(ctx->bc, &alu); if (r) return r; @@ -599,6 +598,33 @@ static int tgsi_slt(struct r600_shader_ctx *ctx) return 0; } +static int tgsi_trans(struct r600_shader_ctx *ctx) +{ + struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; + struct r600_bc_alu alu; + int i, j, r; + + for (i = 0; i < 4; i++) { + memset(&alu, 0, sizeof(struct r600_bc_alu)); + if (inst->Dst[0].Register.WriteMask & (1 << i)) { + alu.inst = ctx->inst_info->r600_opcode; + for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { + r = tgsi_src(ctx, &inst->Src[j], i, &alu.src[j]); + if (r) + return r; + } + r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); + if (r) + return r; + alu.last = 1; + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + } + } + return 0; +} + static int tgsi_op3(struct r600_shader_ctx *ctx) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; @@ -651,7 +677,7 @@ static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = { {TGSI_OPCODE_MOV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2}, {TGSI_OPCODE_LIT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_RCP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_RSQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_RSQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE, tgsi_trans}, {TGSI_OPCODE_EXP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_LOG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_MUL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2}, -- cgit v1.2.3 From cf864fd58b2a4780482a108cd3ff86779e8fa965 Mon Sep 17 00:00:00 2001 From: Jerome Glisse Date: Fri, 23 Jul 2010 18:19:13 -0400 Subject: r600g: fix dp2, dp3, dp4 tokens We need to make sure dp are all mirror accross the alu unit. Signed-off-by: Jerome Glisse --- src/gallium/drivers/r600/r600_shader.c | 87 +++++++++++++++++++++++++--------- 1 file changed, 64 insertions(+), 23 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index e983cc90b4..d788ab88be 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -541,18 +541,6 @@ static int tgsi_op2(struct r600_shader_ctx *ctx) case TGSI_OPCODE_SUB: alu.src[1].neg = 1; break; - case TGSI_OPCODE_DP2: - if (i > 1) { - alu.src[0].sel = alu.src[1].sel = 248; - alu.src[0].chan = alu.src[1].chan = 0; - } - break; - case TGSI_OPCODE_DP3: - if (i > 2) { - alu.src[0].sel = alu.src[1].sel = 248; - alu.src[0].chan = alu.src[1].chan = 0; - } - break; default: break; } @@ -625,6 +613,33 @@ static int tgsi_trans(struct r600_shader_ctx *ctx) return 0; } +static int tgsi_helper_copy(struct r600_shader_ctx *ctx, struct tgsi_full_instruction *inst) +{ + struct r600_bc_alu alu; + int i, r; + + for (i = 0; i < 4; i++) { + memset(&alu, 0, sizeof(struct r600_bc_alu)); + if (!(inst->Dst[0].Register.WriteMask & (1 << i))) { + alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP; + } else { + alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV; + r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); + if (r) + return r; + alu.src[0].sel = ctx->temp_reg; + alu.src[0].chan = i; + } + if (i == 3) { + alu.last = 1; + } + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + } + return 0; +} + static int tgsi_op3(struct r600_shader_ctx *ctx) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; @@ -642,6 +657,7 @@ static int tgsi_op3(struct r600_shader_ctx *ctx) } alu.dst.sel = ctx->temp_reg; alu.dst.chan = i; + alu.dst.write = 1; alu.is_op3 = 1; if (i == 3) { alu.last = 1; @@ -650,17 +666,42 @@ static int tgsi_op3(struct r600_shader_ctx *ctx) if (r) return r; } + return tgsi_helper_copy(ctx, inst); +} + +static int tgsi_dp(struct r600_shader_ctx *ctx) +{ + struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; + struct r600_bc_alu alu; + int i, j, r; + for (i = 0; i < 4; i++) { memset(&alu, 0, sizeof(struct r600_bc_alu)); - if (!(inst->Dst[0].Register.WriteMask & (1 << i))) { - alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP; - } else { - alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV; - r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); + alu.inst = ctx->inst_info->r600_opcode; + for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { + r = tgsi_src(ctx, &inst->Src[j], i, &alu.src[j]); if (r) return r; - alu.src[0].sel = ctx->temp_reg; - alu.src[0].chan = i; + } + alu.dst.sel = ctx->temp_reg; + alu.dst.chan = i; + alu.dst.write = 1; + /* handle some special cases */ + switch (ctx->inst_info->tgsi_opcode) { + case TGSI_OPCODE_DP2: + if (i > 1) { + alu.src[0].sel = alu.src[1].sel = 248; + alu.src[0].chan = alu.src[1].chan = 0; + } + break; + case TGSI_OPCODE_DP3: + if (i > 2) { + alu.src[0].sel = alu.src[1].sel = 248; + alu.src[0].chan = alu.src[1].chan = 0; + } + break; + default: + break; } if (i == 3) { alu.last = 1; @@ -669,7 +710,7 @@ static int tgsi_op3(struct r600_shader_ctx *ctx) if (r) return r; } - return 0; + return tgsi_helper_copy(ctx, inst); } static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = { @@ -682,8 +723,8 @@ static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = { {TGSI_OPCODE_LOG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_MUL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2}, {TGSI_OPCODE_ADD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2}, - {TGSI_OPCODE_DP3, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_op2}, - {TGSI_OPCODE_DP4, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_op2}, + {TGSI_OPCODE_DP3, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, + {TGSI_OPCODE_DP4, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, {TGSI_OPCODE_DST, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_MIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_MAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2}, @@ -747,7 +788,7 @@ static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = { {TGSI_OPCODE_TXB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_NRM, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_DIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_DP2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_op2}, + {TGSI_OPCODE_DP2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, {TGSI_OPCODE_TXL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_BRK, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_IF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, -- cgit v1.2.3 From 1874cb7e82a566079219a571d6a30a74581c611e Mon Sep 17 00:00:00 2001 From: Christopher James Halse Rogers Date: Fri, 11 Jun 2010 20:33:44 +1000 Subject: gallium: Fix build with llvm installed in non-standard location The es1, es2 and gl state trackers include draw_pipe.h, which includes the llvm headers if MESA_LLVM is true, so we also need to add the llvm seachpaths. Similarly, gallivm and other gallium drivers need LLVM_CFLAGS to build when enabled. Also fix xorg drivers, they didn't include LDFLAGS. --- src/gallium/Makefile.template | 4 ++++ src/gallium/targets/Makefile.xorg | 2 +- src/mesa/Makefile | 7 +++++++ 3 files changed, 12 insertions(+), 1 deletion(-) (limited to 'src/gallium') diff --git a/src/gallium/Makefile.template b/src/gallium/Makefile.template index 43203b1756..bff399ec64 100644 --- a/src/gallium/Makefile.template +++ b/src/gallium/Makefile.template @@ -23,6 +23,10 @@ INCLUDES = \ -I$(TOP)/src/gallium/drivers \ $(LIBRARY_INCLUDES) +ifeq ($(MESA_LLVM),1) +LIBRARY_DEFINES += $(LLVM_CFLAGS) +endif + ##### TARGETS ##### diff --git a/src/gallium/targets/Makefile.xorg b/src/gallium/targets/Makefile.xorg index c2d0064978..762c905985 100644 --- a/src/gallium/targets/Makefile.xorg +++ b/src/gallium/targets/Makefile.xorg @@ -42,7 +42,7 @@ endif default: depend $(TOP)/$(LIB_DIR)/gallium $(LIBNAME) $(LIBNAME_STAGING) $(LIBNAME): $(OBJECTS) Makefile ../Makefile.xorg $(LIBS) $(DRIVER_PIPES) - $(MKLIB) -noprefix -o $@ $(OBJECTS) $(DRIVER_PIPES) $(GALLIUM_AUXILIARIES) $(DRIVER_LINKS) + $(MKLIB) -noprefix -o $@ $(LDFLAGS) $(OBJECTS) $(DRIVER_PIPES) $(GALLIUM_AUXILIARIES) $(DRIVER_LINKS) depend: $(C_SOURCES) $(CPP_SOURCES) $(ASM_SOURCES) $(SYMLINKS) $(GENERATED_SOURCES) rm -f depend diff --git a/src/mesa/Makefile b/src/mesa/Makefile index 3e0f010671..7073c92240 100644 --- a/src/mesa/Makefile +++ b/src/mesa/Makefile @@ -20,6 +20,13 @@ MESA_CPPFLAGS := $(API_DEFINES) ES1_CPPFLAGS := -DFEATURE_ES1=1 ES2_CPPFLAGS := -DFEATURE_ES2=1 +ifeq ($(MESA_LLVM),1) +MESA_CPPFLAGS += $(LLVM_CFLAGS) +ES1_CPPFLAGS += $(LLVM_CFLAGS) +ES2_CPPFLAGS += $(LLVM_CFLAGS) +endif + + include sources.mak # adjust object dirs -- cgit v1.2.3 From bd27db400a433d842b5ede862e14136a1e2d8119 Mon Sep 17 00:00:00 2001 From: Vinson Lee Date: Fri, 23 Jul 2010 17:50:35 -0700 Subject: r600g: Fix SCons build. --- src/gallium/drivers/r600/SConscript | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/drivers/r600/SConscript b/src/gallium/drivers/r600/SConscript index 26e2f1941c..99c8644e02 100644 --- a/src/gallium/drivers/r600/SConscript +++ b/src/gallium/drivers/r600/SConscript @@ -27,11 +27,8 @@ r600 = env.ConvenienceLibrary( 'r600_state.c', 'r600_texture.c', 'r600_shader.c', - 'r600_compiler.c', - 'r600_compiler_tgsi.c', - 'r600_compiler_dump.c', - 'r600_compiler_r600.c', - 'r600_compiler_r700.c' + 'r600_asm.c', + 'r700_asm.c', ]) Export('r600') -- cgit v1.2.3 From 33241134e6e3d5bf19141eceff90fd854b23386a Mon Sep 17 00:00:00 2001 From: Jerome Glisse Date: Fri, 23 Jul 2010 20:55:48 -0400 Subject: r600g: first pass at texture support This add texture support to the assembler, generated code is wrong (tested against working dump). Signed-off-by: Jerome Glisse --- src/gallium/drivers/r600/r600_asm.c | 91 +++++++++++++++++++++++++++++++-- src/gallium/drivers/r600/r600_asm.h | 29 +++++++++++ src/gallium/drivers/r600/r600_context.c | 2 +- src/gallium/drivers/r600/r600_helper.c | 11 ++-- src/gallium/drivers/r600/r600_shader.c | 31 ++++++++--- src/gallium/drivers/r600/r600_sq.h | 2 + 6 files changed, 152 insertions(+), 14 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c index 6e48703a57..e678a2fdf2 100644 --- a/src/gallium/drivers/r600/r600_asm.c +++ b/src/gallium/drivers/r600/r600_asm.c @@ -38,6 +38,7 @@ static struct r600_bc_cf *r600_bc_cf(void) LIST_INITHEAD(&cf->list); LIST_INITHEAD(&cf->alu); LIST_INITHEAD(&cf->vtx); + LIST_INITHEAD(&cf->tex); return cf; } @@ -61,6 +62,16 @@ static struct r600_bc_vtx *r600_bc_vtx(void) return vtx; } +static struct r600_bc_tex *r600_bc_tex(void) +{ + struct r600_bc_tex *tex = CALLOC_STRUCT(r600_bc_tex); + + if (tex == NULL) + return NULL; + LIST_INITHEAD(&tex->list); + return tex; +} + int r600_bc_init(struct r600_bc *bc, enum radeon_family family) { LIST_INITHEAD(&bc->cf); @@ -149,8 +160,14 @@ int r600_bc_add_literal(struct r600_bc *bc, const u32 *value) { struct r600_bc_alu *alu; - if (bc->cf_last == NULL || - bc->cf_last->inst != (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3) || + if (bc->cf_last == NULL) { + R600_ERR("no last CF\n"); + return -EINVAL; + } + if (bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_TEX) { + return 0; + } + if (bc->cf_last->inst != (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3) || LIST_IS_EMPTY(&bc->cf_last->alu)) { R600_ERR("last CF is not ALU (%p)\n", bc->cf_last); return -EINVAL; @@ -186,13 +203,39 @@ int r600_bc_add_vtx(struct r600_bc *bc, const struct r600_bc_vtx *vtx) bc->cf_last->inst = V_SQ_CF_WORD1_SQ_CF_INST_VTX; } LIST_ADDTAIL(&nvtx->list, &bc->cf_last->vtx); - /* each fetch use 6 dwords */ + /* each fetch use 4 dwords */ bc->cf_last->ndw += 4; bc->ndw += 4; return 0; } -int r600_bc_vtx_build(struct r600_bc *bc, struct r600_bc_vtx *vtx, unsigned id) +int r600_bc_add_tex(struct r600_bc *bc, const struct r600_bc_tex *tex) +{ + struct r600_bc_tex *ntex = r600_bc_tex(); + int r; + + if (ntex == NULL) + return -ENOMEM; + memcpy(ntex, tex, sizeof(struct r600_bc_tex)); + + /* cf can contains only alu or only vtx or only tex */ + if (bc->cf_last == NULL || + bc->cf_last->inst != V_SQ_CF_WORD1_SQ_CF_INST_TEX) { + r = r600_bc_add_cf(bc); + if (r) { + free(ntex); + return r; + } + bc->cf_last->inst = V_SQ_CF_WORD1_SQ_CF_INST_TEX; + } + LIST_ADDTAIL(&ntex->list, &bc->cf_last->tex); + /* each texture fetch use 4 dwords */ + bc->cf_last->ndw += 4; + bc->ndw += 4; + return 0; +} + +static int r600_bc_vtx_build(struct r600_bc *bc, struct r600_bc_vtx *vtx, unsigned id) { bc->bytecode[id++] = S_SQ_VTX_WORD0_BUFFER_ID(vtx->buffer_id) | S_SQ_VTX_WORD0_SRC_GPR(vtx->src_gpr) | @@ -209,6 +252,35 @@ int r600_bc_vtx_build(struct r600_bc *bc, struct r600_bc_vtx *vtx, unsigned id) return 0; } +static int r600_bc_tex_build(struct r600_bc *bc, struct r600_bc_tex *tex, unsigned id) +{ + bc->bytecode[id++] = S_SQ_TEX_WORD0_TEX_INST(tex->inst) | + S_SQ_TEX_WORD0_RESOURCE_ID(tex->resource_id) | + S_SQ_TEX_WORD0_SRC_GPR(tex->src_gpr) | + S_SQ_TEX_WORD0_SRC_REL(tex->src_rel); + bc->bytecode[id++] = S_SQ_TEX_WORD1_DST_GPR(tex->dst_gpr) | + S_SQ_TEX_WORD1_DST_REL(tex->dst_rel) | + S_SQ_TEX_WORD1_DST_SEL_X(tex->dst_sel_x) | + S_SQ_TEX_WORD1_DST_SEL_Y(tex->dst_sel_y) | + S_SQ_TEX_WORD1_DST_SEL_Z(tex->dst_sel_z) | + S_SQ_TEX_WORD1_DST_SEL_W(tex->dst_sel_w) | + S_SQ_TEX_WORD1_LOD_BIAS(tex->lod_bias) | + S_SQ_TEX_WORD1_COORD_TYPE_X(tex->coord_type_x) | + S_SQ_TEX_WORD1_COORD_TYPE_Y(tex->coord_type_y) | + S_SQ_TEX_WORD1_COORD_TYPE_Z(tex->coord_type_z) | + S_SQ_TEX_WORD1_COORD_TYPE_W(tex->coord_type_w); + bc->bytecode[id++] = S_SQ_TEX_WORD2_OFFSET_X(tex->offset_x) | + S_SQ_TEX_WORD2_OFFSET_Y(tex->offset_y) | + S_SQ_TEX_WORD2_OFFSET_Z(tex->offset_z) | + S_SQ_TEX_WORD2_SAMPLER_ID(tex->sampler_id) | + S_SQ_TEX_WORD2_SRC_SEL_X(tex->src_sel_x) | + S_SQ_TEX_WORD2_SRC_SEL_Y(tex->src_sel_y) | + S_SQ_TEX_WORD2_SRC_SEL_Z(tex->src_sel_z) | + S_SQ_TEX_WORD2_SRC_SEL_W(tex->src_sel_w); + bc->bytecode[id++] = 0; + return 0; +} + int r600_bc_alu_build(struct r600_bc *bc, struct r600_bc_alu *alu, unsigned id) { unsigned i; @@ -262,6 +334,7 @@ int r600_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf) S_SQ_CF_ALU_WORD1_BARRIER(1) | S_SQ_CF_ALU_WORD1_COUNT((cf->ndw / 2) - 1); break; + case V_SQ_CF_WORD1_SQ_CF_INST_TEX: case V_SQ_CF_WORD1_SQ_CF_INST_VTX: case V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC: bc->bytecode[id++] = S_SQ_CF_WORD0_ADDR(cf->addr >> 1); @@ -295,6 +368,7 @@ int r600_bc_build(struct r600_bc *bc) struct r600_bc_cf *cf; struct r600_bc_alu *alu; struct r600_bc_vtx *vtx; + struct r600_bc_tex *tex; unsigned addr; int r; @@ -306,6 +380,7 @@ int r600_bc_build(struct r600_bc *bc) switch (cf->inst) { case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3): break; + case V_SQ_CF_WORD1_SQ_CF_INST_TEX: case V_SQ_CF_WORD1_SQ_CF_INST_VTX: case V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC: /* fetch node need to be 16 bytes aligned*/ @@ -373,6 +448,14 @@ int r600_bc_build(struct r600_bc *bc) addr += 4; } break; + case V_SQ_CF_WORD1_SQ_CF_INST_TEX: + LIST_FOR_EACH_ENTRY(tex, &cf->tex, list) { + r = r600_bc_tex_build(bc, tex, addr); + if (r) + return r; + addr += 4; + } + break; case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT: case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE: break; diff --git a/src/gallium/drivers/r600/r600_asm.h b/src/gallium/drivers/r600/r600_asm.h index 8a874a9df5..88fb957440 100644 --- a/src/gallium/drivers/r600/r600_asm.h +++ b/src/gallium/drivers/r600/r600_asm.h @@ -51,6 +51,33 @@ struct r600_bc_alu { u32 value[4]; }; +struct r600_bc_tex { + struct list_head list; + unsigned inst; + unsigned resource_id; + unsigned src_gpr; + unsigned src_rel; + unsigned dst_gpr; + unsigned dst_rel; + unsigned dst_sel_x; + unsigned dst_sel_y; + unsigned dst_sel_z; + unsigned dst_sel_w; + unsigned lod_bias; + unsigned coord_type_x; + unsigned coord_type_y; + unsigned coord_type_z; + unsigned coord_type_w; + unsigned offset_x; + unsigned offset_y; + unsigned offset_z; + unsigned sampler_id; + unsigned src_sel_x; + unsigned src_sel_y; + unsigned src_sel_z; + unsigned src_sel_w; +}; + struct r600_bc_vtx { struct list_head list; unsigned inst; @@ -87,6 +114,7 @@ struct r600_bc_cf { unsigned ndw; unsigned id; struct list_head alu; + struct list_head tex; struct list_head vtx; struct r600_bc_output output; }; @@ -106,6 +134,7 @@ int r600_bc_init(struct r600_bc *bc, enum radeon_family family); int r600_bc_add_alu(struct r600_bc *bc, const struct r600_bc_alu *alu); int r600_bc_add_literal(struct r600_bc *bc, const u32 *value); int r600_bc_add_vtx(struct r600_bc *bc, const struct r600_bc_vtx *vtx); +int r600_bc_add_tex(struct r600_bc *bc, const struct r600_bc_tex *tex); int r600_bc_add_output(struct r600_bc *bc, const struct r600_bc_output *output); int r600_bc_build(struct r600_bc *bc); diff --git a/src/gallium/drivers/r600/r600_context.c b/src/gallium/drivers/r600/r600_context.c index 3c5195f79e..05575b5767 100644 --- a/src/gallium/drivers/r600/r600_context.c +++ b/src/gallium/drivers/r600/r600_context.c @@ -55,7 +55,7 @@ static void r600_flush(struct pipe_context *ctx, unsigned flags, */ if (!dc) radeon_ctx_dump_bof(rctx->ctx, "gallium.bof"); -#if 1 +#if 0 radeon_ctx_submit(rctx->ctx); #endif rctx->ctx = radeon_ctx_decref(rctx->ctx); diff --git a/src/gallium/drivers/r600/r600_helper.c b/src/gallium/drivers/r600/r600_helper.c index e3175b627a..7241ab1c17 100644 --- a/src/gallium/drivers/r600/r600_helper.c +++ b/src/gallium/drivers/r600/r600_helper.c @@ -27,6 +27,7 @@ #include #include #include "r600_screen.h" +#include "r600_context.h" #include "r600d.h" int r600_conv_pipe_format(unsigned pformat, unsigned *format) @@ -49,6 +50,12 @@ int r600_conv_pipe_format(unsigned pformat, unsigned *format) case PIPE_FORMAT_R8G8B8A8_SSCALED: *format = V_0280A0_COLOR_8_8_8_8; return 0; + case PIPE_FORMAT_R32_FLOAT: + *format = V_0280A0_COLOR_32_FLOAT; + return 0; + case PIPE_FORMAT_R32G32_FLOAT: + *format = V_0280A0_COLOR_32_32_FLOAT; + return 0; case PIPE_FORMAT_L8_UNORM: case PIPE_FORMAT_A8_UNORM: case PIPE_FORMAT_I8_UNORM: @@ -60,8 +67,6 @@ int r600_conv_pipe_format(unsigned pformat, unsigned *format) case PIPE_FORMAT_R64G64_FLOAT: case PIPE_FORMAT_R64G64B64_FLOAT: case PIPE_FORMAT_R64G64B64A64_FLOAT: - case PIPE_FORMAT_R32_FLOAT: - case PIPE_FORMAT_R32G32_FLOAT: case PIPE_FORMAT_R32_UNORM: case PIPE_FORMAT_R32G32_UNORM: case PIPE_FORMAT_R32G32B32_UNORM: @@ -111,7 +116,7 @@ int r600_conv_pipe_format(unsigned pformat, unsigned *format) case PIPE_FORMAT_R32G32B32_FIXED: case PIPE_FORMAT_R32G32B32A32_FIXED: default: - fprintf(stderr, "%s:%d unsupported %d\n", __func__, __LINE__, pformat); + R600_ERR("unsupported %d\n", pformat); return -EINVAL; } } diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index d788ab88be..e865f013f7 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -23,6 +23,7 @@ #include "pipe/p_shader_tokens.h" #include "tgsi/tgsi_parse.h" #include "tgsi/tgsi_scan.h" +#include "tgsi/tgsi_dump.h" #include "util/u_format.h" #include "r600_screen.h" #include "r600_context.h" @@ -259,10 +260,6 @@ static int tgsi_is_supported(struct r600_shader_ctx *ctx) R600_ERR("label unsupported\n"); return -EINVAL; } - if (i->Instruction.Texture) { - R600_ERR("texture unsupported\n"); - return -EINVAL; - } for (j = 0; j < i->Instruction.NumSrcRegs; j++) { if (i->Src[j].Register.Indirect || i->Src[j].Register.Dimension || @@ -321,6 +318,7 @@ static int tgsi_declaration(struct r600_shader_ctx *ctx) break; case TGSI_FILE_CONSTANT: case TGSI_FILE_TEMPORARY: + case TGSI_FILE_SAMPLER: break; default: R600_ERR("unsupported file %d declaration\n", d->Declaration.File); @@ -381,7 +379,6 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s tgsi_parse_token(&ctx.parse); switch (ctx.parse.FullToken.Token.Type) { case TGSI_TOKEN_TYPE_IMMEDIATE: -// R600_ERR("TGSI_TOKEN_TYPE_IMMEDIATE unsupported\n"); immediate = &ctx.parse.FullToken.FullImmediate; value[0] = immediate->u[0].Uint; value[1] = immediate->u[1].Uint; @@ -713,6 +710,28 @@ static int tgsi_dp(struct r600_shader_ctx *ctx) return tgsi_helper_copy(ctx, inst); } +static int tgsi_tex(struct r600_shader_ctx *ctx) +{ + struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; + struct r600_bc_tex tex; + + memset(&tex, 0, sizeof(struct r600_bc_tex)); + tex.inst = ctx->inst_info->r600_opcode; + tex.resource_id = ctx->file_offset[inst->Src[1].Register.File] + inst->Src[1].Register.Index; + tex.sampler_id = tex.resource_id; + tex.src_gpr = ctx->file_offset[inst->Src[0].Register.File] + inst->Src[0].Register.Index; + tex.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Src[0].Register.Index; + tex.dst_sel_x = 0; + tex.dst_sel_y = 1; + tex.dst_sel_z = 2; + tex.dst_sel_w = 3; + tex.src_sel_x = 0; + tex.src_sel_y = 1; + tex.src_sel_z = 2; + tex.src_sel_w = 3; + return r600_bc_add_tex(ctx->bc, &tex); +} + static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = { {TGSI_OPCODE_ARL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_MOV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2}, @@ -771,7 +790,7 @@ static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = { {TGSI_OPCODE_STR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_TEX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_TXD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_TXP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_TXP, 0, 0x10, tgsi_tex}, {TGSI_OPCODE_UP2H, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_UP2US, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_UP4B, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, diff --git a/src/gallium/drivers/r600/r600_sq.h b/src/gallium/drivers/r600/r600_sq.h index 4770ab0bf7..002660c654 100644 --- a/src/gallium/drivers/r600/r600_sq.h +++ b/src/gallium/drivers/r600/r600_sq.h @@ -546,6 +546,8 @@ #define S_SQ_TEX_WORD1_COORD_TYPE_X(x) (((x) & 0x1) << 28) #define G_SQ_TEX_WORD1_COORD_TYPE_X(x) (((x) >> 28) & 0x1) #define C_SQ_TEX_WORD1_COORD_TYPE_X 0xEFFFFFFF +#define V_SQ_TEX_WORD1_COORD_UNNORMALIZED 0x00000000 +#define V_SQ_TEX_WORD1_COORD_NORMALIZED 0x00000001 #define S_SQ_TEX_WORD1_COORD_TYPE_Y(x) (((x) & 0x1) << 29) #define G_SQ_TEX_WORD1_COORD_TYPE_Y(x) (((x) >> 29) & 0x1) #define C_SQ_TEX_WORD1_COORD_TYPE_Y 0xDFFFFFFF -- cgit v1.2.3 From 80b331c7f6c3724f2044325e0d7d7c79ae5a4510 Mon Sep 17 00:00:00 2001 From: Vinson Lee Date: Fri, 23 Jul 2010 18:47:21 -0700 Subject: util: Add PIPE_OS_CYGWIN to u_network. --- src/gallium/auxiliary/util/u_network.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium') diff --git a/src/gallium/auxiliary/util/u_network.c b/src/gallium/auxiliary/util/u_network.c index 87ee0e4768..77f2c5fc7d 100644 --- a/src/gallium/auxiliary/util/u_network.c +++ b/src/gallium/auxiliary/util/u_network.c @@ -6,7 +6,7 @@ #if defined(PIPE_SUBSYSTEM_WINDOWS_USER) # include # include -#elif defined(PIPE_OS_LINUX) || defined(PIPE_OS_HAIKU) || defined(PIPE_OS_APPLE) +#elif defined(PIPE_OS_LINUX) || defined(PIPE_OS_HAIKU) || defined(PIPE_OS_APPLE) || defined(PIPE_OS_CYGWIN) # include # include # include -- cgit v1.2.3 From 5603d2e4c4dd64f8d8ba265dde216bf42a9ac945 Mon Sep 17 00:00:00 2001 From: Vinson Lee Date: Sat, 24 Jul 2010 21:10:45 -0700 Subject: nvfx: Move declaration before code. --- src/gallium/drivers/nvfx/nvfx_fragprog.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/drivers/nvfx/nvfx_fragprog.c b/src/gallium/drivers/nvfx/nvfx_fragprog.c index 6772d9bd51..ee41f03b9b 100644 --- a/src/gallium/drivers/nvfx/nvfx_fragprog.c +++ b/src/gallium/drivers/nvfx/nvfx_fragprog.c @@ -842,7 +842,6 @@ nvfx_fragprog_validate(struct nvfx_context *nvfx) struct nouveau_channel* chan = nvfx->screen->base.channel; struct nvfx_fragment_program *fp = nvfx->fragprog; int update = 0; - int i; if (!fp->translated) { @@ -895,6 +894,7 @@ nvfx_fragprog_validate(struct nvfx_context *nvfx) { struct nvfx_fragment_program_bo* fpbo = os_malloc_aligned(sizeof(struct nvfx_fragment_program) + fp->prog_size * fp->progs_per_bo, 16); char *map, *buf; + int i; if(fp->fpbo) { @@ -910,7 +910,7 @@ nvfx_fragprog_validate(struct nvfx_context *nvfx) map = fpbo->bo->map; buf = fpbo->insn; - for(int i = 0; i < fp->progs_per_bo; ++i) + for(i = 0; i < fp->progs_per_bo; ++i) { memcpy(buf, fp->insn, fp->insn_len * 4); nvfx_fp_memcpy(map, fp->insn, fp->insn_len * 4); @@ -931,6 +931,7 @@ nvfx_fragprog_validate(struct nvfx_context *nvfx) uint32_t* map = pipe_buffer_map(&nvfx->pipe, constbuf, PIPE_TRANSFER_READ, &transfer); uint32_t* fpmap = (uint32_t*)((char*)fp->fpbo->bo->map + offset); uint32_t* buf = (uint32_t*)((char*)fp->fpbo->insn + offset); + int i; for (i = 0; i < fp->nr_consts; ++i) { unsigned off = fp->consts[i].offset; unsigned idx = fp->consts[i].index * 4; -- cgit v1.2.3 From 3adb5c7d45f4a38b0fbb66633c808f69082918cc Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Sat, 24 Jul 2010 03:07:45 +0200 Subject: r300g: do not align texture height to 2^n for 1D and 2D non-mipmapped textures I don't remember why the alignment was there, but it seems to be no longer needed. I guess it was a dirty fix for some other bug. --- src/gallium/drivers/r300/r300_texture.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'src/gallium') diff --git a/src/gallium/drivers/r300/r300_texture.c b/src/gallium/drivers/r300/r300_texture.c index 5750bc4329..f7c167d1bf 100644 --- a/src/gallium/drivers/r300/r300_texture.c +++ b/src/gallium/drivers/r300/r300_texture.c @@ -774,7 +774,11 @@ static unsigned r300_texture_get_nblocksy(struct r300_texture* tex, height = align(height, tile_height); /* This is needed for the kernel checker, unfortunately. */ - height = util_next_power_of_two(height); + if ((tex->b.b.target != PIPE_TEXTURE_1D && + tex->b.b.target != PIPE_TEXTURE_2D) || + tex->b.b.last_level != 0) { + height = util_next_power_of_two(height); + } } return util_format_get_nblocksy(tex->b.b.format, height); -- cgit v1.2.3 From 4ce26210842176c4b280b7db85639ced40d4083d Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Sat, 24 Jul 2010 03:17:32 +0200 Subject: r300g: cleanup texture debug logging --- src/gallium/drivers/r300/r300_debug.c | 1 + src/gallium/drivers/r300/r300_screen.h | 1 + src/gallium/drivers/r300/r300_texture.c | 41 ++++++++++++++++++--------------- 3 files changed, 25 insertions(+), 18 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/drivers/r300/r300_debug.c b/src/gallium/drivers/r300/r300_debug.c index 31d4e14681..053a64ea6d 100644 --- a/src/gallium/drivers/r300/r300_debug.c +++ b/src/gallium/drivers/r300/r300_debug.c @@ -38,6 +38,7 @@ static const struct debug_named_value debug_options[] = { { "fall", DBG_FALL, "Fallbacks (for debugging)" }, { "rs", DBG_RS, "Rasterizer (for debugging)" }, { "fb", DBG_FB, "Framebuffer (for debugging)" }, + { "cbzb", DBG_CBZB, "Fast color clear info (for debugging)" }, { "fakeocc", DBG_FAKE_OCC, "Use fake occlusion queries (for debugging)" }, { "anisohq", DBG_ANISOHQ, "High quality anisotropic filtering (for benchmarking)" }, { "notiling", DBG_NO_TILING, "Disable tiling (for benchmarking)" }, diff --git a/src/gallium/drivers/r300/r300_screen.h b/src/gallium/drivers/r300/r300_screen.h index edc494ff6c..18745b83a0 100644 --- a/src/gallium/drivers/r300/r300_screen.h +++ b/src/gallium/drivers/r300/r300_screen.h @@ -90,6 +90,7 @@ r300_winsys_screen(struct pipe_screen *screen) { #define DBG_FALL (1 << 8) #define DBG_FB (1 << 9) #define DBG_RS_BLOCK (1 << 10) +#define DBG_CBZB (1 << 11) /* Features. */ #define DBG_ANISOHQ (1 << 16) #define DBG_NO_TILING (1 << 17) diff --git a/src/gallium/drivers/r300/r300_texture.c b/src/gallium/drivers/r300/r300_texture.c index f7c167d1bf..176fa76920 100644 --- a/src/gallium/drivers/r300/r300_texture.c +++ b/src/gallium/drivers/r300/r300_texture.c @@ -958,6 +958,21 @@ struct u_resource_vtbl r300_texture_vtbl = u_default_transfer_inline_write /* transfer_inline_write */ }; +static void r300_tex_print_info(struct r300_screen *rscreen, struct r300_texture *tex, + const char *func) +{ + fprintf(stderr, + "r300: %s: Macro: %s, Micro: %s, Pitch: %i, Dim: %ix%ix%i, " + "LastLevel: %i, Size: %i, Format: %s\n", + func, + tex->macrotile ? "YES" : " NO", + tex->microtile ? "YES" : " NO", + tex->hwpitch[0], + tex->b.b.width0, tex->b.b.height0, tex->b.b.depth0, + tex->b.b.last_level, tex->size, + util_format_short_name(tex->b.b.format)); +} + /* Create a new texture. */ struct pipe_resource* r300_texture_create(struct pipe_screen* screen, const struct pipe_resource* base) @@ -997,15 +1012,9 @@ struct pipe_resource* r300_texture_create(struct pipe_screen* screen, r300_texture_setup_immutable_state(rscreen, tex); r300_texture_setup_fb_state(rscreen, tex); - SCREEN_DBG(rscreen, DBG_TEX, - "r300: texture_create: Macro: %s, Micro: %s, Pitch: %i, " - "Dim: %ix%ix%i, LastLevel: %i, Size: %i, Format: %s\n", - tex->macrotile ? "YES" : " NO", - tex->microtile ? "YES" : " NO", - tex->hwpitch[0], - base->width0, base->height0, base->depth0, base->last_level, - tex->size, - util_format_short_name(base->format)); + if (SCREEN_DBG_ON(rscreen, DBG_TEX)) { + r300_tex_print_info(rscreen, tex, "texture_create"); + } tex->domain = base->flags & R300_RESOURCE_FLAG_TRANSFER ? R300_DOMAIN_GTT : @@ -1084,7 +1093,7 @@ struct pipe_surface* r300_get_tex_surface(struct pipe_screen* screen, else surface->cbzb_format = R300_DEPTHFORMAT_16BIT_INT_Z; - SCREEN_DBG(r300_screen(screen), DBG_TEX, + SCREEN_DBG(r300_screen(screen), DBG_CBZB, "CBZB Dim: %ix%i, Misalignment: %i, Macro: %s\n", surface->cbzb_width, surface->cbzb_height, offset & 2047, @@ -1144,14 +1153,6 @@ r300_texture_from_handle(struct pipe_screen* screen, rws->buffer_get_tiling(rws, buffer, &tex->microtile, &tex->macrotile); r300_setup_flags(tex); - SCREEN_DBG(rscreen, DBG_TEX, - "r300: texture_from_handle: Macro: %s, Micro: %s, " - "Pitch: % 4i, Dim: %ix%i, Format: %s\n", - tex->macrotile ? "YES" : " NO", - tex->microtile ? "YES" : " NO", - stride / util_format_get_blocksize(base->format), - base->width0, base->height0, - util_format_short_name(base->format)); /* Enforce microtiled zbuffer. */ override_zb_flags = util_format_is_depth_or_stencil(base->format) && @@ -1184,5 +1185,9 @@ r300_texture_from_handle(struct pipe_screen* screen, tex->microtile, tex->macrotile, tex->pitch[0] * util_format_get_blocksize(tex->b.b.format)); } + + if (SCREEN_DBG_ON(rscreen, DBG_TEX)) + r300_tex_print_info(rscreen, tex, "texture_from_handle"); + return (struct pipe_resource*)tex; } -- cgit v1.2.3 From 065e3f7ff2a9b6170e51b0104036088e8d163ea0 Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Sat, 24 Jul 2010 03:34:18 +0200 Subject: r300g: reject resources from handles which are not large enough The driver gets a buffer and its size in resource_from_handle. It computes the required minimum buffer size from given texture properties, and compares the two sizes. This is to early detect DDX bugs. --- src/gallium/drivers/r300/r300_context.h | 7 ++++++- src/gallium/drivers/r300/r300_texture.c | 18 +++++++++++++++--- src/gallium/drivers/r300/r300_winsys.h | 7 ++++--- src/gallium/winsys/radeon/drm/radeon_drm_buffer.c | 2 +- src/gallium/winsys/radeon/drm/radeon_r300.c | 11 ++++++++--- 5 files changed, 34 insertions(+), 11 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h index b9c96d5bdd..7b58587a2a 100644 --- a/src/gallium/drivers/r300/r300_context.h +++ b/src/gallium/drivers/r300/r300_context.h @@ -352,9 +352,14 @@ struct r300_texture { */ unsigned stride_override; - /* Total size of this texture, in bytes. */ + /* Total size of this texture, in bytes, + * derived from the texture properties. */ unsigned size; + /* Total size of the buffer backing this texture, in bytes. + * It must be >= size. */ + unsigned buffer_size; + /* Whether this texture has non-power-of-two dimensions * or a user-specified pitch. * It can be either a regular texture or a rectangle one. diff --git a/src/gallium/drivers/r300/r300_texture.c b/src/gallium/drivers/r300/r300_texture.c index 176fa76920..711042722c 100644 --- a/src/gallium/drivers/r300/r300_texture.c +++ b/src/gallium/drivers/r300/r300_texture.c @@ -1022,6 +1022,7 @@ struct pipe_resource* r300_texture_create(struct pipe_screen* screen, tex->buffer = rws->buffer_create(rws, tex->size, 2048, base->bind, base->usage, tex->domain); + tex->buffer_size = tex->size; if (!tex->buffer) { FREE(tex); @@ -1120,7 +1121,7 @@ r300_texture_from_handle(struct pipe_screen* screen, struct r300_screen* rscreen = r300_screen(screen); struct r300_winsys_buffer *buffer; struct r300_texture* tex; - unsigned stride; + unsigned stride, size; boolean override_zb_flags; /* Support only 2D textures without mipmaps */ @@ -1130,7 +1131,7 @@ r300_texture_from_handle(struct pipe_screen* screen, return NULL; } - buffer = rws->buffer_from_handle(rws, whandle, &stride); + buffer = rws->buffer_from_handle(rws, whandle, &stride, &size); if (!buffer) { return NULL; } @@ -1150,6 +1151,7 @@ r300_texture_from_handle(struct pipe_screen* screen, /* one ref already taken */ tex->buffer = buffer; + tex->buffer_size = size; rws->buffer_get_tiling(rws, buffer, &tex->microtile, &tex->macrotile); r300_setup_flags(tex); @@ -1186,8 +1188,18 @@ r300_texture_from_handle(struct pipe_screen* screen, tex->pitch[0] * util_format_get_blocksize(tex->b.b.format)); } - if (SCREEN_DBG_ON(rscreen, DBG_TEX)) + /* Make sure the buffer we got is large enough. */ + if (tex->size > tex->buffer_size) { + fprintf(stderr, "r300: texture_from_handle: The buffer is not " + "large enough. Got: %i, Need: %i, Info:\n", + tex->buffer_size, tex->size); r300_tex_print_info(rscreen, tex, "texture_from_handle"); + pipe_resource_reference((struct pipe_resource**)&tex, NULL); + return NULL; + } else { + if (SCREEN_DBG_ON(rscreen, DBG_TEX)) + r300_tex_print_info(rscreen, tex, "texture_from_handle"); + } return (struct pipe_resource*)tex; } diff --git a/src/gallium/drivers/r300/r300_winsys.h b/src/gallium/drivers/r300/r300_winsys.h index 7e115c2d62..ff11546a64 100644 --- a/src/gallium/drivers/r300/r300_winsys.h +++ b/src/gallium/drivers/r300/r300_winsys.h @@ -184,12 +184,13 @@ struct r300_winsys_screen { * \param ws The winsys this function is called from. * \param whandle A winsys handle pointer as was received from a state * tracker. - * \param stride A pointer to the stride return variable. - * The stride is in bytes. + * \param stride The returned buffer stride in bytes. + * \param size The returned buffer size. */ struct r300_winsys_buffer *(*buffer_from_handle)(struct r300_winsys_screen *ws, struct winsys_handle *whandle, - unsigned *stride); + unsigned *stride, + unsigned *size); /** * Get a winsys handle from a winsys buffer. The internal structure diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_buffer.c b/src/gallium/winsys/radeon/drm/radeon_drm_buffer.c index 5ea5912089..017eac8464 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_buffer.c +++ b/src/gallium/winsys/radeon/drm/radeon_drm_buffer.c @@ -189,7 +189,7 @@ struct pb_buffer *radeon_drm_bufmgr_create_buffer_from_handle(struct pb_manager pipe_reference_init(&buf->base.base.reference, 1); buf->base.base.alignment = 0; buf->base.base.usage = PB_USAGE_GPU_WRITE | PB_USAGE_GPU_READ; - buf->base.base.size = 0; + buf->base.base.size = bo->size; buf->base.vtbl = &radeon_drm_buffer_vtbl; buf->mgr = mgr; diff --git a/src/gallium/winsys/radeon/drm/radeon_r300.c b/src/gallium/winsys/radeon/drm/radeon_r300.c index effa27f5c7..5544504067 100644 --- a/src/gallium/winsys/radeon/drm/radeon_r300.c +++ b/src/gallium/winsys/radeon/drm/radeon_r300.c @@ -109,14 +109,19 @@ static void radeon_r300_winsys_buffer_reference(struct r300_winsys_screen *rws, static struct r300_winsys_buffer *radeon_r300_winsys_buffer_from_handle(struct r300_winsys_screen *rws, struct winsys_handle *whandle, - unsigned *stride) + unsigned *stride, + unsigned *size) { struct radeon_libdrm_winsys *ws = radeon_libdrm_winsys(rws); struct pb_buffer *_buf; - *stride = whandle->stride; - _buf = radeon_drm_bufmgr_create_buffer_from_handle(ws->kman, whandle->handle); + + if (stride) + *stride = whandle->stride; + if (size) + *size = _buf->base.size; + return radeon_libdrm_winsys_buffer(_buf); } -- cgit v1.2.3 From d779a5d16ae6a17b3fc0c097f4eb477a80e54566 Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Sat, 24 Jul 2010 21:32:53 +0200 Subject: r300g: cleanup texture creation code This decouples initializing a texture layout/miptree description from an actual texture creation, it also partially unifies texture_create and texture_from_handle. r300_texture inherits r300_texture_desc, which inherits u_resource. The CBZB clear criteria are moved to r300_texture_desc::cbzb_allowed[level]. And other minor cleanups. --- src/gallium/drivers/r300/Makefile | 1 + src/gallium/drivers/r300/SConscript | 1 + src/gallium/drivers/r300/r300_blit.c | 19 +- src/gallium/drivers/r300/r300_context.h | 64 +-- src/gallium/drivers/r300/r300_defines.h | 5 +- src/gallium/drivers/r300/r300_fs.c | 2 +- src/gallium/drivers/r300/r300_state.c | 15 +- src/gallium/drivers/r300/r300_state_derived.c | 8 +- src/gallium/drivers/r300/r300_texture.c | 593 ++++++-------------------- src/gallium/drivers/r300/r300_texture.h | 6 - src/gallium/drivers/r300/r300_texture_desc.c | 456 ++++++++++++++++++++ src/gallium/drivers/r300/r300_texture_desc.h | 57 +++ src/gallium/drivers/r300/r300_transfer.c | 49 ++- 13 files changed, 733 insertions(+), 543 deletions(-) create mode 100644 src/gallium/drivers/r300/r300_texture_desc.c create mode 100644 src/gallium/drivers/r300/r300_texture_desc.h (limited to 'src/gallium') diff --git a/src/gallium/drivers/r300/Makefile b/src/gallium/drivers/r300/Makefile index 13152635a6..728bc40a5b 100644 --- a/src/gallium/drivers/r300/Makefile +++ b/src/gallium/drivers/r300/Makefile @@ -24,6 +24,7 @@ C_SOURCES = \ r300_vs.c \ r300_vs_draw.c \ r300_texture.c \ + r300_texture_desc.c \ r300_tgsi_to_rc.c \ r300_transfer.c diff --git a/src/gallium/drivers/r300/SConscript b/src/gallium/drivers/r300/SConscript index 552ed4e5be..bf023daaa5 100644 --- a/src/gallium/drivers/r300/SConscript +++ b/src/gallium/drivers/r300/SConscript @@ -34,6 +34,7 @@ r300 = env.ConvenienceLibrary( 'r300_vs.c', 'r300_vs_draw.c', 'r300_texture.c', + 'r300_texture_desc.c', 'r300_tgsi_to_rc.c', 'r300_transfer.c', ] + r300compiler) + r300compiler diff --git a/src/gallium/drivers/r300/r300_blit.c b/src/gallium/drivers/r300/r300_blit.c index 895efaa1c4..d125196b6d 100644 --- a/src/gallium/drivers/r300/r300_blit.c +++ b/src/gallium/drivers/r300/r300_blit.c @@ -97,29 +97,12 @@ static boolean r300_cbzb_clear_allowed(struct r300_context *r300, { struct pipe_framebuffer_state *fb = (struct pipe_framebuffer_state*)r300->fb_state.state; - struct r300_surface *surf = r300_surface(fb->cbufs[0]); - unsigned bpp; /* Only color clear allowed, and only one colorbuffer. */ if (clear_buffers != PIPE_CLEAR_COLOR || fb->nr_cbufs != 1) return FALSE; - /* The colorbuffer must be point-sampled. */ - if (surf->base.texture->nr_samples > 1) - return FALSE; - - bpp = util_format_get_blocksizebits(surf->base.format); - - /* ZB can only work with the two pixel sizes. */ - if (bpp != 16 && bpp != 32) - return FALSE; - - /* If the midpoint ZB offset is not aligned to 2048, it returns garbage - * with certain texture sizes. Macrotiling ensures the alignment. */ - if (!r300_texture(surf->base.texture)->mip_macrotile[surf->base.level]) - return FALSE; - - return TRUE; + return r300_surface(fb->cbufs[0])->cbzb_allowed; } /* Clear currently bound buffers. */ diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h index 7b58587a2a..06e4e12558 100644 --- a/src/gallium/drivers/r300/r300_context.h +++ b/src/gallium/drivers/r300/r300_context.h @@ -318,29 +318,38 @@ struct r300_surface { uint32_t cbzb_midpoint_offset; /* DEPTHOFFSET. */ uint32_t cbzb_pitch; /* DEPTHPITCH. */ uint32_t cbzb_format; /* ZB_FORMAT. */ + + /* Whether the CBZB clear is allowed on the surface. */ + boolean cbzb_allowed; }; -struct r300_texture { - /* Parent class */ +struct r300_texture_desc { + /* Parent class. */ struct u_resource b; - enum r300_buffer_domain domain; + /* Buffer tiling. + * Macrotiling is specified per-level because small mipmaps cannot + * be macrotiled. */ + enum r300_buffer_tiling microtile; + enum r300_buffer_tiling macrotile[R300_MAX_TEXTURE_LEVELS]; /* Offsets into the buffer. */ - unsigned offset[R300_MAX_TEXTURE_LEVELS]; + unsigned offset_in_bytes[R300_MAX_TEXTURE_LEVELS]; - /* A pitch for each mip-level */ - unsigned pitch[R300_MAX_TEXTURE_LEVELS]; + /* Strides for each mip-level. */ + unsigned stride_in_pixels[R300_MAX_TEXTURE_LEVELS]; + unsigned stride_in_bytes[R300_MAX_TEXTURE_LEVELS]; - /* A pitch multiplied by blockwidth as hardware wants - * the number of pixels instead of the number of blocks. */ - unsigned hwpitch[R300_MAX_TEXTURE_LEVELS]; + /* Size of one zslice or face or 2D image based on the texture target. */ + unsigned layer_size_in_bytes[R300_MAX_TEXTURE_LEVELS]; - /* Size of one zslice or face based on the texture target */ - unsigned layer_size[R300_MAX_TEXTURE_LEVELS]; + /* Total size of this texture, in bytes, + * derived from the texture properties. */ + unsigned size_in_bytes; - /* Whether the mipmap level is macrotiled. */ - enum r300_buffer_tiling mip_macrotile[R300_MAX_TEXTURE_LEVELS]; + /* Total size of the buffer backing this texture, in bytes. + * It must be >= size. */ + unsigned buffer_size_in_bytes; /** * If non-zero, override the natural texture layout with @@ -350,21 +359,25 @@ struct r300_texture { * * \sa r300_texture_get_stride */ - unsigned stride_override; - - /* Total size of this texture, in bytes, - * derived from the texture properties. */ - unsigned size; - - /* Total size of the buffer backing this texture, in bytes. - * It must be >= size. */ - unsigned buffer_size; + unsigned stride_in_bytes_override; /* Whether this texture has non-power-of-two dimensions - * or a user-specified pitch. + * or a user-specified stride. * It can be either a regular texture or a rectangle one. + * + * This flag says that hardware must use the stride for addressing + * instead of the width. */ - boolean uses_pitch; + boolean uses_stride_addressing; + + /* Whether CBZB fast color clear is allowed on the miplevel. */ + boolean cbzb_allowed[R300_MAX_TEXTURE_LEVELS]; +}; + +struct r300_texture { + struct r300_texture_desc desc; + + enum r300_buffer_domain domain; /* Pipe buffer backing this texture. */ struct r300_winsys_buffer *buffer; @@ -375,9 +388,6 @@ struct r300_texture { /* All bits should be filled in. */ struct r300_texture_fb_state fb_state; - /* Buffer tiling */ - enum r300_buffer_tiling microtile, macrotile; - /* This is the level tiling flags were last time set for. * It's used to prevent redundant tiling-flags changes from happening.*/ unsigned surface_level; diff --git a/src/gallium/drivers/r300/r300_defines.h b/src/gallium/drivers/r300/r300_defines.h index d510d80a7b..896aeef395 100644 --- a/src/gallium/drivers/r300/r300_defines.h +++ b/src/gallium/drivers/r300/r300_defines.h @@ -36,7 +36,10 @@ enum r300_buffer_tiling { R300_BUFFER_LINEAR = 0, R300_BUFFER_TILED, - R300_BUFFER_SQUARETILED + R300_BUFFER_SQUARETILED, + + R300_BUFFER_UNKNOWN, + R300_BUFFER_SELECT_LAYOUT = R300_BUFFER_UNKNOWN }; enum r300_buffer_domain { /* bitfield */ diff --git a/src/gallium/drivers/r300/r300_fs.c b/src/gallium/drivers/r300/r300_fs.c index b145ded639..6eac12bfb9 100644 --- a/src/gallium/drivers/r300/r300_fs.c +++ b/src/gallium/drivers/r300/r300_fs.c @@ -173,7 +173,7 @@ static void get_external_state( t = (struct r300_texture*)texstate->sampler_views[i]->base.texture; /* XXX this should probably take into account STR, not just S. */ - if (t->uses_pitch) { + if (t->desc.uses_stride_addressing) { switch (s->state.wrap_s) { case PIPE_TEX_WRAP_REPEAT: state->unit[i].wrap_mode = RC_WRAP_REPEAT; diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c index f52265b1c0..6e2a6ca0e4 100644 --- a/src/gallium/drivers/r300/r300_state.c +++ b/src/gallium/drivers/r300/r300_state.c @@ -619,7 +619,8 @@ static void r300_tex_set_tiling_flags(struct r300_context *r300, { /* Check if the macrotile flag needs to be changed. * Skip changing the flags otherwise. */ - if (tex->mip_macrotile[tex->surface_level] != tex->mip_macrotile[level]) { + if (tex->desc.macrotile[tex->surface_level] != + tex->desc.macrotile[level]) { /* Tiling determines how DRM treats the buffer data. * We must flush CS when changing it if the buffer is referenced. */ if (r300->rws->cs_is_buffer_referenced(r300->cs, @@ -627,8 +628,8 @@ static void r300_tex_set_tiling_flags(struct r300_context *r300, r300->context.flush(&r300->context, 0, NULL); r300->rws->buffer_set_tiling(r300->rws, tex->buffer, - tex->microtile, tex->mip_macrotile[level], - tex->pitch[0] * util_format_get_blocksize(tex->b.b.format)); + tex->desc.microtile, tex->desc.macrotile[level], + tex->desc.stride_in_bytes[0]); tex->surface_level = level; } @@ -670,8 +671,10 @@ static void r300_print_fb_surf_info(struct pipe_surface *surf, unsigned index, surf->zslice, surf->face, surf->level, util_format_short_name(surf->format), - rtex->macrotile ? "YES" : " NO", rtex->microtile ? "YES" : " NO", - rtex->hwpitch[0], tex->width0, tex->height0, tex->depth0, + rtex->desc.macrotile[0] ? "YES" : " NO", + rtex->desc.microtile ? "YES" : " NO", + rtex->desc.stride_in_pixels[0], + tex->width0, tex->height0, tex->depth0, tex->last_level, util_format_short_name(tex->format)); } @@ -1293,7 +1296,7 @@ static void r300_set_fragment_sampler_views(struct pipe_context* pipe, /* Set the texrect factor in the fragment shader. * Needed for RECT and NPOT fallback. */ texture = r300_texture(views[i]->texture); - if (texture->uses_pitch) { + if (texture->desc.uses_stride_addressing) { r300->fs_rc_constant_state.dirty = TRUE; } diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c index 2ef9766578..e20d8d0fdf 100644 --- a/src/gallium/drivers/r300/r300_state_derived.c +++ b/src/gallium/drivers/r300/r300_state_derived.c @@ -567,7 +567,7 @@ static void r300_merge_textures_and_samplers(struct r300_context* r300) * are stored in the format. * Otherwise, swizzles must be applied after the compare mode * in the fragment shader. */ - if (util_format_is_depth_or_stencil(tex->b.b.format)) { + if (util_format_is_depth_or_stencil(tex->desc.b.b.format)) { if (sampler->state.compare_mode == PIPE_TEX_COMPARE_NONE) { texstate->format.format1 |= r300_get_swizzle_combined(depth_swizzle, view->swizzle); @@ -578,12 +578,12 @@ static void r300_merge_textures_and_samplers(struct r300_context* r300) } /* to emulate 1D textures through 2D ones correctly */ - if (tex->b.b.target == PIPE_TEXTURE_1D) { + if (tex->desc.b.b.target == PIPE_TEXTURE_1D) { texstate->filter0 &= ~R300_TX_WRAP_T_MASK; texstate->filter0 |= R300_TX_WRAP_T(R300_TX_CLAMP_TO_EDGE); } - if (tex->uses_pitch) { + if (tex->desc.uses_stride_addressing) { /* NPOT textures don't support mip filter, unfortunately. * This prevents incorrect rendering. */ texstate->filter0 &= ~R300_TX_MIN_FILTER_MIP_MASK; @@ -610,7 +610,7 @@ static void r300_merge_textures_and_samplers(struct r300_context* r300) /* determine min/max levels */ /* the MAX_MIP level is the largest (finest) one */ max_level = MIN3(sampler->max_lod + view->base.first_level, - tex->b.b.last_level, view->base.last_level); + tex->desc.b.b.last_level, view->base.last_level); min_level = MIN2(sampler->min_lod + view->base.first_level, max_level); texstate->format.format0 |= R300_TX_NUM_LEVELS(max_level); diff --git a/src/gallium/drivers/r300/r300_texture.c b/src/gallium/drivers/r300/r300_texture.c index 711042722c..e99a4630ee 100644 --- a/src/gallium/drivers/r300/r300_texture.c +++ b/src/gallium/drivers/r300/r300_texture.c @@ -26,6 +26,7 @@ #include "r300_context.h" #include "r300_reg.h" +#include "r300_texture_desc.h" #include "r300_transfer.h" #include "r300_screen.h" #include "r300_winsys.h" @@ -37,11 +38,6 @@ #include "pipe/p_screen.h" -enum r300_dim { - DIM_WIDTH = 0, - DIM_HEIGHT = 1 -}; - unsigned r300_get_swizzle_combined(const unsigned char *swizzle_format, const unsigned char *swizzle_view) { @@ -544,17 +540,17 @@ static void r300_texture_setup_immutable_state(struct r300_screen* screen, struct r300_texture* tex) { struct r300_texture_format_state* f = &tex->tx_format; - struct pipe_resource *pt = &tex->b.b; + struct pipe_resource *pt = &tex->desc.b.b; boolean is_r500 = screen->caps.is_r500; /* Set sampler state. */ f->format0 = R300_TX_WIDTH((pt->width0 - 1) & 0x7ff) | R300_TX_HEIGHT((pt->height0 - 1) & 0x7ff); - if (tex->uses_pitch) { + if (tex->desc.uses_stride_addressing) { /* rectangles love this */ f->format0 |= R300_TX_PITCH_EN; - f->format2 = (tex->hwpitch[0] - 1) & 0x1fff; + f->format2 = (tex->desc.stride_in_pixels[0] - 1) & 0x1fff; } else { /* power of two textures (3D, mipmaps, and no pitch) */ f->format0 |= R300_TX_DEPTH(util_logbase2(pt->depth0) & 0xf); @@ -579,8 +575,8 @@ static void r300_texture_setup_immutable_state(struct r300_screen* screen, } } - f->tile_config = R300_TXO_MACRO_TILE(tex->macrotile) | - R300_TXO_MICRO_TILE(tex->microtile); + f->tile_config = R300_TXO_MACRO_TILE(tex->desc.macrotile[0]) | + R300_TXO_MICRO_TILE(tex->desc.microtile); } static void r300_texture_setup_fb_state(struct r300_screen* screen, @@ -589,23 +585,23 @@ static void r300_texture_setup_fb_state(struct r300_screen* screen, unsigned i; /* Set framebuffer state. */ - if (util_format_is_depth_or_stencil(tex->b.b.format)) { - for (i = 0; i <= tex->b.b.last_level; i++) { + if (util_format_is_depth_or_stencil(tex->desc.b.b.format)) { + for (i = 0; i <= tex->desc.b.b.last_level; i++) { tex->fb_state.pitch[i] = - tex->hwpitch[i] | - R300_DEPTHMACROTILE(tex->mip_macrotile[i]) | - R300_DEPTHMICROTILE(tex->microtile); + tex->desc.stride_in_pixels[i] | + R300_DEPTHMACROTILE(tex->desc.macrotile[i]) | + R300_DEPTHMICROTILE(tex->desc.microtile); } - tex->fb_state.format = r300_translate_zsformat(tex->b.b.format); + tex->fb_state.format = r300_translate_zsformat(tex->desc.b.b.format); } else { - for (i = 0; i <= tex->b.b.last_level; i++) { + for (i = 0; i <= tex->desc.b.b.last_level; i++) { tex->fb_state.pitch[i] = - tex->hwpitch[i] | - r300_translate_colorformat(tex->b.b.format) | - R300_COLOR_TILE(tex->mip_macrotile[i]) | - R300_COLOR_MICROTILE(tex->microtile); + tex->desc.stride_in_pixels[i] | + r300_translate_colorformat(tex->desc.b.b.format) | + R300_COLOR_TILE(tex->desc.macrotile[i]) | + R300_COLOR_MICROTILE(tex->desc.microtile); } - tex->fb_state.format = r300_translate_out_fmt(tex->b.b.format); + tex->fb_state.format = r300_translate_out_fmt(tex->desc.b.b.format); } } @@ -625,286 +621,6 @@ void r300_texture_reinterpret_format(struct pipe_screen *screen, r300_texture_setup_fb_state(r300_screen(screen), r300_texture(tex)); } -unsigned r300_texture_get_offset(struct r300_texture* tex, unsigned level, - unsigned zslice, unsigned face) -{ - unsigned offset = tex->offset[level]; - - switch (tex->b.b.target) { - case PIPE_TEXTURE_3D: - assert(face == 0); - return offset + zslice * tex->layer_size[level]; - - case PIPE_TEXTURE_CUBE: - assert(zslice == 0); - return offset + face * tex->layer_size[level]; - - default: - assert(zslice == 0 && face == 0); - return offset; - } -} - -/* Returns the number of pixels that the texture should be aligned to - * in the given dimension. */ -static unsigned r300_get_pixel_alignment(struct r300_texture *tex, - enum r300_buffer_tiling macrotile, - enum r300_dim dim) -{ - static const unsigned table[2][5][3][2] = - { - { - /* Macro: linear linear linear - Micro: linear tiled square-tiled */ - {{ 32, 1}, { 8, 4}, { 0, 0}}, /* 8 bits per pixel */ - {{ 16, 1}, { 8, 2}, { 4, 4}}, /* 16 bits per pixel */ - {{ 8, 1}, { 4, 2}, { 0, 0}}, /* 32 bits per pixel */ - {{ 4, 1}, { 0, 0}, { 2, 2}}, /* 64 bits per pixel */ - {{ 2, 1}, { 0, 0}, { 0, 0}} /* 128 bits per pixel */ - }, - { - /* Macro: tiled tiled tiled - Micro: linear tiled square-tiled */ - {{256, 8}, {64, 32}, { 0, 0}}, /* 8 bits per pixel */ - {{128, 8}, {64, 16}, {32, 32}}, /* 16 bits per pixel */ - {{ 64, 8}, {32, 16}, { 0, 0}}, /* 32 bits per pixel */ - {{ 32, 8}, { 0, 0}, {16, 16}}, /* 64 bits per pixel */ - {{ 16, 8}, { 0, 0}, { 0, 0}} /* 128 bits per pixel */ - } - }; - static const unsigned aa_block[2] = {4, 8}; - unsigned res = 0; - unsigned pixsize = util_format_get_blocksize(tex->b.b.format); - - assert(macrotile <= R300_BUFFER_TILED); - assert(tex->microtile <= R300_BUFFER_SQUARETILED); - assert(pixsize <= 16); - assert(dim <= DIM_HEIGHT); - - if (tex->b.b.nr_samples > 1) { - /* Multisampled textures have their own alignment scheme. */ - if (pixsize == 4) - res = aa_block[dim]; - } else { - /* Standard alignment. */ - res = table[macrotile][util_logbase2(pixsize)][tex->microtile][dim]; - } - - assert(res); - return res; -} - -/* Return true if macrotiling should be enabled on the miplevel. */ -static boolean r300_texture_macro_switch(struct r300_texture *tex, - unsigned level, - boolean rv350_mode, - enum r300_dim dim) -{ - unsigned tile, texdim; - - tile = r300_get_pixel_alignment(tex, R300_BUFFER_TILED, dim); - if (dim == DIM_WIDTH) { - texdim = u_minify(tex->b.b.width0, level); - } else { - texdim = u_minify(tex->b.b.height0, level); - } - - /* See TX_FILTER1_n.MACRO_SWITCH. */ - if (rv350_mode) { - return texdim >= tile; - } else { - return texdim > tile; - } -} - -/** - * Return the stride, in bytes, of the texture images of the given texture - * at the given level. - */ -unsigned r300_texture_get_stride(struct r300_screen* screen, - struct r300_texture* tex, unsigned level) -{ - unsigned tile_width, width, stride; - - if (tex->stride_override) - return tex->stride_override; - - /* Check the level. */ - if (level > tex->b.b.last_level) { - SCREEN_DBG(screen, DBG_TEX, "%s: level (%u) > last_level (%u)\n", - __FUNCTION__, level, tex->b.b.last_level); - return 0; - } - - width = u_minify(tex->b.b.width0, level); - - if (util_format_is_plain(tex->b.b.format)) { - tile_width = r300_get_pixel_alignment(tex, tex->mip_macrotile[level], - DIM_WIDTH); - width = align(width, tile_width); - - stride = util_format_get_stride(tex->b.b.format, width); - - /* Some IGPs need a minimum stride of 64 bytes, hmm... - * This doesn't seem to apply to tiled textures, according to r300c. */ - if (!tex->microtile && !tex->mip_macrotile[level] && - (screen->caps.family == CHIP_FAMILY_RS600 || - screen->caps.family == CHIP_FAMILY_RS690 || - screen->caps.family == CHIP_FAMILY_RS740)) { - return stride < 64 ? 64 : stride; - } - - /* The alignment to 32 bytes is sort of implied by the layout... */ - return stride; - } else { - return align(util_format_get_stride(tex->b.b.format, width), 32); - } -} - -static unsigned r300_texture_get_nblocksy(struct r300_texture* tex, - unsigned level) -{ - unsigned height, tile_height; - - height = u_minify(tex->b.b.height0, level); - - if (util_format_is_plain(tex->b.b.format)) { - tile_height = r300_get_pixel_alignment(tex, tex->mip_macrotile[level], - DIM_HEIGHT); - height = align(height, tile_height); - - /* This is needed for the kernel checker, unfortunately. */ - if ((tex->b.b.target != PIPE_TEXTURE_1D && - tex->b.b.target != PIPE_TEXTURE_2D) || - tex->b.b.last_level != 0) { - height = util_next_power_of_two(height); - } - } - - return util_format_get_nblocksy(tex->b.b.format, height); -} - -static void r300_texture_3d_fix_mipmapping(struct r300_screen *screen, - struct r300_texture *tex) -{ - /* The kernels <= 2.6.34-rc4 compute the size of mipmapped 3D textures - * incorrectly. This is a workaround to prevent CS from being rejected. */ - - unsigned i, size; - - if (!screen->rws->get_value(screen->rws, R300_VID_DRM_2_3_0) && - tex->b.b.target == PIPE_TEXTURE_3D && - tex->b.b.last_level > 0) { - size = 0; - - for (i = 0; i <= tex->b.b.last_level; i++) { - size += r300_texture_get_stride(screen, tex, i) * - r300_texture_get_nblocksy(tex, i); - } - - size *= tex->b.b.depth0; - tex->size = size; - } -} - -static void r300_setup_miptree(struct r300_screen* screen, - struct r300_texture* tex) -{ - struct pipe_resource* base = &tex->b.b; - unsigned stride, size, layer_size, nblocksy, i; - boolean rv350_mode = screen->caps.is_rv350; - - SCREEN_DBG(screen, DBG_TEXALLOC, - "r300: Making miptree for texture, format %s\n", - util_format_short_name(base->format)); - - for (i = 0; i <= base->last_level; i++) { - /* Let's see if this miplevel can be macrotiled. */ - tex->mip_macrotile[i] = - (tex->macrotile == R300_BUFFER_TILED && - r300_texture_macro_switch(tex, i, rv350_mode, DIM_WIDTH) && - r300_texture_macro_switch(tex, i, rv350_mode, DIM_HEIGHT)) ? - R300_BUFFER_TILED : R300_BUFFER_LINEAR; - - stride = r300_texture_get_stride(screen, tex, i); - nblocksy = r300_texture_get_nblocksy(tex, i); - layer_size = stride * nblocksy; - - if (base->nr_samples) { - layer_size *= base->nr_samples; - } - - if (base->target == PIPE_TEXTURE_CUBE) - size = layer_size * 6; - else - size = layer_size * u_minify(base->depth0, i); - - tex->offset[i] = tex->size; - tex->size = tex->offset[i] + size; - tex->layer_size[i] = layer_size; - tex->pitch[i] = stride / util_format_get_blocksize(base->format); - tex->hwpitch[i] = - tex->pitch[i] * util_format_get_blockwidth(base->format); - - SCREEN_DBG(screen, DBG_TEXALLOC, "r300: Texture miptree: Level %d " - "(%dx%dx%d px, pitch %d bytes) %d bytes total, macrotiled %s\n", - i, u_minify(base->width0, i), u_minify(base->height0, i), - u_minify(base->depth0, i), stride, tex->size, - tex->mip_macrotile[i] ? "TRUE" : "FALSE"); - } -} - -static void r300_setup_flags(struct r300_texture* tex) -{ - tex->uses_pitch = !util_is_power_of_two(tex->b.b.width0) || - !util_is_power_of_two(tex->b.b.height0) || - tex->stride_override; -} - -static void r300_setup_tiling(struct pipe_screen *screen, - struct r300_texture *tex) -{ - struct r300_winsys_screen *rws = (struct r300_winsys_screen *)screen->winsys; - enum pipe_format format = tex->b.b.format; - boolean rv350_mode = r300_screen(screen)->caps.is_rv350; - boolean is_zb = util_format_is_depth_or_stencil(format); - boolean dbg_no_tiling = SCREEN_DBG_ON(r300_screen(screen), DBG_NO_TILING); - - if (!util_format_is_plain(format)) { - return; - } - - /* If height == 1, disable microtiling except for zbuffer. */ - if (!is_zb && (tex->b.b.height0 == 1 || dbg_no_tiling)) { - return; - } - - /* Set microtiling. */ - switch (util_format_get_blocksize(format)) { - case 1: - case 4: - tex->microtile = R300_BUFFER_TILED; - break; - - case 2: - case 8: - if (rws->get_value(rws, R300_VID_SQUARE_TILING_SUPPORT)) { - tex->microtile = R300_BUFFER_SQUARETILED; - } - break; - } - - if (dbg_no_tiling) { - return; - } - - /* Set macrotiling. */ - if (r300_texture_macro_switch(tex, 0, rv350_mode, DIM_WIDTH) && - r300_texture_macro_switch(tex, 0, rv350_mode, DIM_HEIGHT)) { - tex->macrotile = R300_BUFFER_TILED; - } -} - static unsigned r300_texture_is_referenced(struct pipe_context *context, struct pipe_resource *texture, unsigned face, unsigned level) @@ -941,11 +657,10 @@ static boolean r300_texture_get_handle(struct pipe_screen* screen, } return rws->buffer_get_handle(rws, tex->buffer, - r300_texture_get_stride(r300_screen(screen), tex, 0), - whandle); + tex->desc.stride_in_bytes[0], whandle); } -struct u_resource_vtbl r300_texture_vtbl = +struct u_resource_vtbl r300_texture_vtbl = { r300_texture_get_handle, /* get_handle */ r300_texture_destroy, /* resource_destroy */ @@ -958,32 +673,69 @@ struct u_resource_vtbl r300_texture_vtbl = u_default_transfer_inline_write /* transfer_inline_write */ }; -static void r300_tex_print_info(struct r300_screen *rscreen, struct r300_texture *tex, - const char *func) +/* The common texture constructor. */ +static struct r300_texture* +r300_texture_create_object(struct r300_screen *rscreen, + const struct pipe_resource *base, + enum r300_buffer_tiling microtile, + enum r300_buffer_tiling macrotile, + unsigned stride_in_bytes_override, + unsigned max_buffer_size, + struct r300_winsys_buffer *buffer) { - fprintf(stderr, - "r300: %s: Macro: %s, Micro: %s, Pitch: %i, Dim: %ix%ix%i, " - "LastLevel: %i, Size: %i, Format: %s\n", - func, - tex->macrotile ? "YES" : " NO", - tex->microtile ? "YES" : " NO", - tex->hwpitch[0], - tex->b.b.width0, tex->b.b.height0, tex->b.b.depth0, - tex->b.b.last_level, tex->size, - util_format_short_name(tex->b.b.format)); + struct r300_winsys_screen *rws = rscreen->rws; + struct r300_texture *tex = CALLOC_STRUCT(r300_texture); + if (!tex) { + if (buffer) + rws->buffer_reference(rws, &buffer, NULL); + return NULL; + } + + /* Initialize the descriptor. */ + if (!r300_texture_desc_init(rscreen, &tex->desc, base, + microtile, macrotile, + stride_in_bytes_override, + max_buffer_size)) { + if (buffer) + rws->buffer_reference(rws, &buffer, NULL); + FREE(tex); + return NULL; + } + /* Initialize the hardware state. */ + r300_texture_setup_immutable_state(rscreen, tex); + r300_texture_setup_fb_state(rscreen, tex); + + tex->desc.b.vtbl = &r300_texture_vtbl; + pipe_reference_init(&tex->desc.b.b.reference, 1); + tex->domain = base->flags & R300_RESOURCE_FLAG_TRANSFER ? + R300_DOMAIN_GTT : + R300_DOMAIN_VRAM | R300_DOMAIN_GTT; + tex->buffer = buffer; + + /* Create the backing buffer if needed. */ + if (!tex->buffer) { + tex->buffer = rws->buffer_create(rws, tex->desc.size_in_bytes, 2048, + base->bind, base->usage, tex->domain); + + if (!tex->buffer) { + FREE(tex); + return NULL; + } + } + + rws->buffer_set_tiling(rws, tex->buffer, + tex->desc.microtile, tex->desc.macrotile[0], + tex->desc.stride_in_bytes[0]); + + return tex; } /* Create a new texture. */ -struct pipe_resource* r300_texture_create(struct pipe_screen* screen, - const struct pipe_resource* base) +struct pipe_resource *r300_texture_create(struct pipe_screen *screen, + const struct pipe_resource *base) { - struct r300_texture* tex = CALLOC_STRUCT(r300_texture); - struct r300_screen* rscreen = r300_screen(screen); - struct r300_winsys_screen *rws = (struct r300_winsys_screen *)screen->winsys; - - if (!tex) { - return NULL; - } + struct r300_screen *rscreen = r300_screen(screen); + enum r300_buffer_tiling microtile, macrotile; /* Refuse to create a texture with size 0. */ if (!base->width0 || @@ -993,53 +745,70 @@ struct pipe_resource* r300_texture_create(struct pipe_screen* screen, fprintf(stderr, "r300: texture_create: " "Got invalid texture dimensions: %ix%ix%i\n", base->width0, base->height0, base->depth0); - FREE(tex); return NULL; } - tex->b.b = *base; - tex->b.vtbl = &r300_texture_vtbl; - pipe_reference_init(&tex->b.b.reference, 1); - tex->b.b.screen = screen; - - r300_setup_flags(tex); - if (!(base->flags & R300_RESOURCE_FLAG_TRANSFER) && - !(base->bind & PIPE_BIND_SCANOUT)) { - r300_setup_tiling(screen, tex); + if ((base->flags & R300_RESOURCE_FLAG_TRANSFER) || + (base->bind & PIPE_BIND_SCANOUT)) { + microtile = R300_BUFFER_LINEAR; + macrotile = R300_BUFFER_LINEAR; + } else { + microtile = R300_BUFFER_SELECT_LAYOUT; + macrotile = R300_BUFFER_SELECT_LAYOUT; } - r300_setup_miptree(rscreen, tex); - r300_texture_3d_fix_mipmapping(rscreen, tex); - r300_texture_setup_immutable_state(rscreen, tex); - r300_texture_setup_fb_state(rscreen, tex); - if (SCREEN_DBG_ON(rscreen, DBG_TEX)) { - r300_tex_print_info(rscreen, tex, "texture_create"); + return (struct pipe_resource*) + r300_texture_create_object(rscreen, base, microtile, macrotile, + 0, 0, NULL); +} + +struct pipe_resource *r300_texture_from_handle(struct pipe_screen *screen, + const struct pipe_resource *base, + struct winsys_handle *whandle) +{ + struct r300_winsys_screen *rws = (struct r300_winsys_screen*)screen->winsys; + struct r300_screen *rscreen = r300_screen(screen); + struct r300_winsys_buffer *buffer; + enum r300_buffer_tiling microtile, macrotile; + unsigned stride, size; + + /* Support only 2D textures without mipmaps */ + if (base->target != PIPE_TEXTURE_2D || + base->depth0 != 1 || + base->last_level != 0) { + return NULL; } - tex->domain = base->flags & R300_RESOURCE_FLAG_TRANSFER ? - R300_DOMAIN_GTT : - R300_DOMAIN_VRAM | R300_DOMAIN_GTT; + buffer = rws->buffer_from_handle(rws, whandle, &stride, &size); + if (!buffer) + return NULL; - tex->buffer = rws->buffer_create(rws, tex->size, 2048, base->bind, - base->usage, tex->domain); - tex->buffer_size = tex->size; + rws->buffer_get_tiling(rws, buffer, µtile, ¯otile); - if (!tex->buffer) { - FREE(tex); - return NULL; - } + /* Enforce a microtiled zbuffer. */ + if (util_format_is_depth_or_stencil(base->format) && + microtile == R300_BUFFER_LINEAR) { + switch (util_format_get_blocksize(base->format)) { + case 4: + microtile = R300_BUFFER_TILED; + break; - rws->buffer_set_tiling(rws, tex->buffer, - tex->microtile, tex->macrotile, - tex->pitch[0] * util_format_get_blocksize(tex->b.b.format)); + case 2: + if (rws->get_value(rws, R300_VID_SQUARE_TILING_SUPPORT)) + microtile = R300_BUFFER_SQUARETILED; + break; + } + } - return (struct pipe_resource*)tex; + return (struct pipe_resource*) + r300_texture_create_object(rscreen, base, microtile, macrotile, + stride, size, buffer); } /* Not required to implement u_resource_vtbl, consider moving to another file: */ struct pipe_surface* r300_get_tex_surface(struct pipe_screen* screen, - struct pipe_resource* texture, + struct pipe_resource* texture, unsigned face, unsigned level, unsigned zslice, @@ -1049,7 +818,7 @@ struct pipe_surface* r300_get_tex_surface(struct pipe_screen* screen, struct r300_surface* surface = CALLOC_STRUCT(r300_surface); if (surface) { - uint32_t stride, offset, tile_height; + uint32_t offset, tile_height; pipe_reference_init(&surface->base.reference, 1); pipe_resource_reference(&surface->base.texture, texture); @@ -1068,23 +837,29 @@ struct pipe_surface* r300_get_tex_surface(struct pipe_screen* screen, if (surface->domain & R300_DOMAIN_VRAM) surface->domain &= ~R300_DOMAIN_GTT; - surface->offset = r300_texture_get_offset(tex, level, zslice, face); + surface->offset = r300_texture_get_offset(&tex->desc, + level, zslice, face); surface->pitch = tex->fb_state.pitch[level]; surface->format = tex->fb_state.format; /* Parameters for the CBZB clear. */ + surface->cbzb_allowed = tex->desc.cbzb_allowed[level]; surface->cbzb_width = align(surface->base.width, 64); /* Height must be aligned to the size of a tile. */ - tile_height = r300_get_pixel_alignment(tex, tex->mip_macrotile[level], + tile_height = r300_get_pixel_alignment(tex->desc.b.b.format, + tex->desc.b.b.nr_samples, + tex->desc.microtile, + tex->desc.macrotile[level], DIM_HEIGHT); + surface->cbzb_height = align((surface->base.height + 1) / 2, tile_height); /* Offset must be aligned to 2K and must point at the beginning * of a scanline. */ - stride = r300_texture_get_stride(r300_screen(screen), tex, level); - offset = surface->offset + stride * surface->cbzb_height; + offset = surface->offset + + tex->desc.stride_in_bytes[level] * surface->cbzb_height; surface->cbzb_midpoint_offset = offset & ~2047; surface->cbzb_pitch = surface->pitch & 0x1ffffc; @@ -1098,7 +873,7 @@ struct pipe_surface* r300_get_tex_surface(struct pipe_screen* screen, "CBZB Dim: %ix%i, Misalignment: %i, Macro: %s\n", surface->cbzb_width, surface->cbzb_height, offset & 2047, - tex->mip_macrotile[level] ? "YES" : " NO"); + tex->desc.macrotile[level] ? "YES" : " NO"); } return &surface->base; @@ -1111,95 +886,3 @@ void r300_tex_surface_destroy(struct pipe_surface* s) pipe_resource_reference(&s->texture, NULL); FREE(s); } - -struct pipe_resource* -r300_texture_from_handle(struct pipe_screen* screen, - const struct pipe_resource* base, - struct winsys_handle *whandle) -{ - struct r300_winsys_screen *rws = (struct r300_winsys_screen*)screen->winsys; - struct r300_screen* rscreen = r300_screen(screen); - struct r300_winsys_buffer *buffer; - struct r300_texture* tex; - unsigned stride, size; - boolean override_zb_flags; - - /* Support only 2D textures without mipmaps */ - if (base->target != PIPE_TEXTURE_2D || - base->depth0 != 1 || - base->last_level != 0) { - return NULL; - } - - buffer = rws->buffer_from_handle(rws, whandle, &stride, &size); - if (!buffer) { - return NULL; - } - - tex = CALLOC_STRUCT(r300_texture); - if (!tex) { - return NULL; - } - - tex->b.b = *base; - tex->b.vtbl = &r300_texture_vtbl; - pipe_reference_init(&tex->b.b.reference, 1); - tex->b.b.screen = screen; - tex->domain = R300_DOMAIN_VRAM; - - tex->stride_override = stride; - - /* one ref already taken */ - tex->buffer = buffer; - tex->buffer_size = size; - - rws->buffer_get_tiling(rws, buffer, &tex->microtile, &tex->macrotile); - r300_setup_flags(tex); - - /* Enforce microtiled zbuffer. */ - override_zb_flags = util_format_is_depth_or_stencil(base->format) && - tex->microtile == R300_BUFFER_LINEAR; - - if (override_zb_flags) { - switch (util_format_get_blocksize(base->format)) { - case 4: - tex->microtile = R300_BUFFER_TILED; - break; - - case 2: - if (rws->get_value(rws, R300_VID_SQUARE_TILING_SUPPORT)) { - tex->microtile = R300_BUFFER_SQUARETILED; - break; - } - /* Pass through. */ - - default: - override_zb_flags = FALSE; - } - } - - r300_setup_miptree(rscreen, tex); - r300_texture_setup_immutable_state(rscreen, tex); - r300_texture_setup_fb_state(rscreen, tex); - - if (override_zb_flags) { - rws->buffer_set_tiling(rws, tex->buffer, - tex->microtile, tex->macrotile, - tex->pitch[0] * util_format_get_blocksize(tex->b.b.format)); - } - - /* Make sure the buffer we got is large enough. */ - if (tex->size > tex->buffer_size) { - fprintf(stderr, "r300: texture_from_handle: The buffer is not " - "large enough. Got: %i, Need: %i, Info:\n", - tex->buffer_size, tex->size); - r300_tex_print_info(rscreen, tex, "texture_from_handle"); - pipe_resource_reference((struct pipe_resource**)&tex, NULL); - return NULL; - } else { - if (SCREEN_DBG_ON(rscreen, DBG_TEX)) - r300_tex_print_info(rscreen, tex, "texture_from_handle"); - } - - return (struct pipe_resource*)tex; -} diff --git a/src/gallium/drivers/r300/r300_texture.h b/src/gallium/drivers/r300/r300_texture.h index 99e7694254..585036ab3b 100644 --- a/src/gallium/drivers/r300/r300_texture.h +++ b/src/gallium/drivers/r300/r300_texture.h @@ -39,12 +39,6 @@ uint32_t r300_translate_texformat(enum pipe_format format, uint32_t r500_tx_format_msb_bit(enum pipe_format format); -unsigned r300_texture_get_stride(struct r300_screen* screen, - struct r300_texture* tex, unsigned level); - -unsigned r300_texture_get_offset(struct r300_texture* tex, unsigned level, - unsigned zslice, unsigned face); - void r300_texture_reinterpret_format(struct pipe_screen *screen, struct pipe_resource *tex, enum pipe_format new_format); diff --git a/src/gallium/drivers/r300/r300_texture_desc.c b/src/gallium/drivers/r300/r300_texture_desc.c new file mode 100644 index 0000000000..18a2bd31fd --- /dev/null +++ b/src/gallium/drivers/r300/r300_texture_desc.c @@ -0,0 +1,456 @@ +/* + * Copyright 2008 Corbin Simpson + * Copyright 2010 Marek Olšák + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#include "r300_texture_desc.h" + +#include "r300_context.h" +#include "r300_winsys.h" + +#include "util/u_format.h" + +/* Returns the number of pixels that the texture should be aligned to + * in the given dimension. */ +unsigned r300_get_pixel_alignment(enum pipe_format format, + unsigned num_samples, + enum r300_buffer_tiling microtile, + enum r300_buffer_tiling macrotile, + enum r300_dim dim) +{ + static const unsigned table[2][5][3][2] = + { + { + /* Macro: linear linear linear + Micro: linear tiled square-tiled */ + {{ 32, 1}, { 8, 4}, { 0, 0}}, /* 8 bits per pixel */ + {{ 16, 1}, { 8, 2}, { 4, 4}}, /* 16 bits per pixel */ + {{ 8, 1}, { 4, 2}, { 0, 0}}, /* 32 bits per pixel */ + {{ 4, 1}, { 0, 0}, { 2, 2}}, /* 64 bits per pixel */ + {{ 2, 1}, { 0, 0}, { 0, 0}} /* 128 bits per pixel */ + }, + { + /* Macro: tiled tiled tiled + Micro: linear tiled square-tiled */ + {{256, 8}, {64, 32}, { 0, 0}}, /* 8 bits per pixel */ + {{128, 8}, {64, 16}, {32, 32}}, /* 16 bits per pixel */ + {{ 64, 8}, {32, 16}, { 0, 0}}, /* 32 bits per pixel */ + {{ 32, 8}, { 0, 0}, {16, 16}}, /* 64 bits per pixel */ + {{ 16, 8}, { 0, 0}, { 0, 0}} /* 128 bits per pixel */ + } + }; + static const unsigned aa_block[2] = {4, 8}; + unsigned tile = 0; + unsigned pixsize = util_format_get_blocksize(format); + + assert(macrotile <= R300_BUFFER_TILED); + assert(microtile <= R300_BUFFER_SQUARETILED); + assert(pixsize <= 16); + assert(dim <= DIM_HEIGHT); + + if (num_samples > 1) { + /* Multisampled textures have their own alignment scheme. */ + if (pixsize == 4) + tile = aa_block[dim]; + /* XXX FP16 AA. */ + } else { + /* Standard alignment. */ + tile = table[macrotile][util_logbase2(pixsize)][microtile][dim]; + } + + assert(tile); + return tile; +} + +/* Return true if macrotiling should be enabled on the miplevel. */ +static boolean r300_texture_macro_switch(struct r300_texture_desc *desc, + unsigned level, + boolean rv350_mode, + enum r300_dim dim) +{ + unsigned tile, texdim; + + tile = r300_get_pixel_alignment(desc->b.b.format, desc->b.b.nr_samples, + desc->microtile, R300_BUFFER_TILED, dim); + if (dim == DIM_WIDTH) { + texdim = u_minify(desc->b.b.width0, level); + } else { + texdim = u_minify(desc->b.b.height0, level); + } + + /* See TX_FILTER1_n.MACRO_SWITCH. */ + if (rv350_mode) { + return texdim >= tile; + } else { + return texdim > tile; + } +} + +/** + * Return the stride, in bytes, of the texture image of the given texture + * at the given level. + */ +static unsigned r300_texture_get_stride(struct r300_screen *screen, + struct r300_texture_desc *desc, + unsigned level) +{ + unsigned tile_width, width, stride; + + if (desc->stride_in_bytes_override) + return desc->stride_in_bytes_override; + + /* Check the level. */ + if (level > desc->b.b.last_level) { + SCREEN_DBG(screen, DBG_TEX, "%s: level (%u) > last_level (%u)\n", + __FUNCTION__, level, desc->b.b.last_level); + return 0; + } + + width = u_minify(desc->b.b.width0, level); + + if (util_format_is_plain(desc->b.b.format)) { + tile_width = r300_get_pixel_alignment(desc->b.b.format, + desc->b.b.nr_samples, + desc->microtile, + desc->macrotile[level], + DIM_WIDTH); + width = align(width, tile_width); + + stride = util_format_get_stride(desc->b.b.format, width); + + /* Some IGPs need a minimum stride of 64 bytes, hmm... + * This doesn't seem to apply to tiled textures, according to r300c. */ + if (!desc->microtile && !desc->macrotile[level] && + (screen->caps.family == CHIP_FAMILY_RS600 || + screen->caps.family == CHIP_FAMILY_RS690 || + screen->caps.family == CHIP_FAMILY_RS740)) { + return stride < 64 ? 64 : stride; + } + + /* The alignment to 32 bytes is sort of implied by the layout... */ + return stride; + } else { + return align(util_format_get_stride(desc->b.b.format, width), 32); + } +} + +static unsigned r300_texture_get_nblocksy(struct r300_texture_desc *desc, + unsigned level, + boolean align_for_cbzb) +{ + unsigned height, tile_height; + + height = u_minify(desc->b.b.height0, level); + + if (util_format_is_plain(desc->b.b.format)) { + tile_height = r300_get_pixel_alignment(desc->b.b.format, + desc->b.b.nr_samples, + desc->microtile, + desc->macrotile[level], + DIM_HEIGHT); + height = align(height, tile_height); + + /* This is needed for the kernel checker, unfortunately. */ + if ((desc->b.b.target != PIPE_TEXTURE_1D && + desc->b.b.target != PIPE_TEXTURE_2D) || + desc->b.b.last_level != 0) { + height = util_next_power_of_two(height); + } + + /* Allocate an even number of macrotiles for the CBZB clear. + * Do so for 3 or more macrotiles in the Y direction. */ + if (align_for_cbzb && + level == 0 && desc->b.b.last_level == 0 && + desc->macrotile[0] && height >= tile_height * 3) { + height = align(height, tile_height * 2); + } + } + + return util_format_get_nblocksy(desc->b.b.format, height); +} + +static void r300_texture_3d_fix_mipmapping(struct r300_screen *screen, + struct r300_texture_desc *desc) +{ + /* The kernels <= 2.6.34-rc4 compute the size of mipmapped 3D textures + * incorrectly. This is a workaround to prevent CS from being rejected. */ + + unsigned i, size; + + if (!screen->rws->get_value(screen->rws, R300_VID_DRM_2_3_0) && + desc->b.b.target == PIPE_TEXTURE_3D && + desc->b.b.last_level > 0) { + size = 0; + + for (i = 0; i <= desc->b.b.last_level; i++) { + size += desc->stride_in_bytes[i] * + r300_texture_get_nblocksy(desc, i, FALSE); + } + + size *= desc->b.b.depth0; + desc->size_in_bytes = size; + } +} + +static void r300_setup_miptree(struct r300_screen *screen, + struct r300_texture_desc *desc) +{ + struct pipe_resource *base = &desc->b.b; + unsigned stride, size, layer_size, nblocksy, i; + boolean rv350_mode = screen->caps.is_rv350; + + SCREEN_DBG(screen, DBG_TEXALLOC, + "r300: Making miptree for texture, format %s\n", + util_format_short_name(base->format)); + + for (i = 0; i <= base->last_level; i++) { + /* Let's see if this miplevel can be macrotiled. */ + desc->macrotile[i] = + (desc->macrotile[0] == R300_BUFFER_TILED && + r300_texture_macro_switch(desc, i, rv350_mode, DIM_WIDTH) && + r300_texture_macro_switch(desc, i, rv350_mode, DIM_HEIGHT)) ? + R300_BUFFER_TILED : R300_BUFFER_LINEAR; + + stride = r300_texture_get_stride(screen, desc, i); + nblocksy = r300_texture_get_nblocksy(desc, i, desc->stride_in_bytes_override == 0); + layer_size = stride * nblocksy; + + if (base->nr_samples) { + layer_size *= base->nr_samples; + } + + if (base->target == PIPE_TEXTURE_CUBE) + size = layer_size * 6; + else + size = layer_size * u_minify(base->depth0, i); + + desc->offset_in_bytes[i] = desc->size_in_bytes; + desc->size_in_bytes = desc->offset_in_bytes[i] + size; + desc->layer_size_in_bytes[i] = layer_size; + desc->stride_in_bytes[i] = stride; + desc->stride_in_pixels[i] = + (stride / util_format_get_blocksize(base->format)) * + util_format_get_blockwidth(base->format); + + SCREEN_DBG(screen, DBG_TEXALLOC, "r300: Texture miptree: Level %d " + "(%dx%dx%d px, pitch %d bytes) %d bytes total, macrotiled %s\n", + i, u_minify(base->width0, i), u_minify(base->height0, i), + u_minify(base->depth0, i), stride, desc->size_in_bytes, + desc->macrotile[i] ? "TRUE" : "FALSE"); + } +} + +static void r300_setup_flags(struct r300_texture_desc *desc) +{ + desc->uses_stride_addressing = + !util_is_power_of_two(desc->b.b.width0) || + !util_is_power_of_two(desc->b.b.height0) || + desc->stride_in_bytes_override; +} + +static void r300_setup_cbzb_flags(struct r300_screen *rscreen, + struct r300_texture_desc *desc) +{ + unsigned i, bpp; + boolean first_level_valid; + + bpp = util_format_get_blocksizebits(desc->b.b.format); + + /* 1) The texture must be point-sampled, + * 2) The depth must be 16 or 32 bits. + * 3) If the midpoint ZB offset is not aligned to 2048, it returns garbage + * with certain texture sizes. Macrotiling ensures the alignment. */ + first_level_valid = desc->b.b.nr_samples <= 1 && + (bpp == 16 || bpp == 32) && + desc->macrotile[0]; + + for (i = 0; i <= desc->b.b.last_level; i++) + desc->cbzb_allowed[i] = first_level_valid && desc->macrotile[i]; + return; +#if 0 + /* When clearing, the layer (width*height) is horizontally split + * into two, and the upper and lower halves are cleared by the CB + * and ZB units, respectively. Therefore, the number of macrotiles + * in the Y direction must be even. */ + + if (desc->b.b.last_level > 0 || + desc->b.b.target == PIPE_TEXTURE_3D || + desc->b.b.target == PIPE_TEXTURE_CUBE) { + /* For mipmapped, 3D, or cube textures, just check if there are + * enough macrotiles per layer. */ + for (i = 0; i <= desc->b.b.last_level; i++) { + desc->cbzb_allowed[i] = FALSE; + + if (first_level_valid && desc->macrotile[i]) { + unsigned height, tile_height, num_macrotiles; + + /* Compute the number of macrotiles in the Y direction. */ + tile_height = r300_get_pixel_alignment(desc->b.b.format, + desc->b.b.nr_samples, + desc->microtile, + R300_BUFFER_TILED, + DIM_HEIGHT); + height = r300_texture_get_height(desc, i); + num_macrotiles = height / tile_height; + + desc->cbzb_allowed[i] = num_macrotiles % 2 == 0; + } + } + } else { + /* For 1D and 2D non-mipmapped textures */ + unsigned layer_size; + + layer_size = desc->stride_in_bytes[0] * + r300_texture_get_nblocksy(desc, 0, TRUE); + } +#endif +} + +static void r300_setup_tiling(struct r300_screen *screen, + struct r300_texture_desc *desc) +{ + struct r300_winsys_screen *rws = screen->rws; + enum pipe_format format = desc->b.b.format; + boolean rv350_mode = screen->caps.is_rv350; + boolean is_zb = util_format_is_depth_or_stencil(format); + boolean dbg_no_tiling = SCREEN_DBG_ON(screen, DBG_NO_TILING); + + if (!util_format_is_plain(format)) { + return; + } + + /* If height == 1, disable microtiling except for zbuffer. */ + if (!is_zb && (desc->b.b.height0 == 1 || dbg_no_tiling)) { + return; + } + + /* Set microtiling. */ + switch (util_format_get_blocksize(format)) { + case 1: + case 4: + desc->microtile = R300_BUFFER_TILED; + break; + + case 2: + case 8: + if (rws->get_value(rws, R300_VID_SQUARE_TILING_SUPPORT)) { + desc->microtile = R300_BUFFER_SQUARETILED; + } + break; + } + + if (dbg_no_tiling) { + return; + } + + /* Set macrotiling. */ + if (r300_texture_macro_switch(desc, 0, rv350_mode, DIM_WIDTH) && + r300_texture_macro_switch(desc, 0, rv350_mode, DIM_HEIGHT)) { + desc->macrotile[0] = R300_BUFFER_TILED; + } +} + +static void r300_tex_print_info(struct r300_screen *rscreen, + struct r300_texture_desc *desc, + const char *func) +{ + fprintf(stderr, + "r300: %s: Macro: %s, Micro: %s, Pitch: %i, Dim: %ix%ix%i, " + "LastLevel: %i, Size: %i, Format: %s\n", + func, + desc->macrotile[0] ? "YES" : " NO", + desc->microtile ? "YES" : " NO", + desc->stride_in_pixels[0], + desc->b.b.width0, desc->b.b.height0, desc->b.b.depth0, + desc->b.b.last_level, desc->size_in_bytes, + util_format_short_name(desc->b.b.format)); +} + +boolean r300_texture_desc_init(struct r300_screen *rscreen, + struct r300_texture_desc *desc, + const struct pipe_resource *base, + enum r300_buffer_tiling microtile, + enum r300_buffer_tiling macrotile, + unsigned stride_in_bytes_override, + unsigned max_buffer_size) +{ + desc->b.b = *base; + desc->b.b.screen = &rscreen->screen; + + desc->stride_in_bytes_override = stride_in_bytes_override; + + r300_setup_flags(desc); + + if (microtile == R300_BUFFER_SELECT_LAYOUT || + macrotile == R300_BUFFER_SELECT_LAYOUT) { + r300_setup_tiling(rscreen, desc); + } else { + desc->microtile = microtile; + desc->macrotile[0] = macrotile; + assert(desc->b.b.last_level == 0); + } + + r300_setup_miptree(rscreen, desc); + r300_texture_3d_fix_mipmapping(rscreen, desc); + r300_setup_cbzb_flags(rscreen, desc); + + if (max_buffer_size) { + /* Make sure the buffer we got is large enough. */ + if (desc->size_in_bytes > max_buffer_size) { + fprintf(stderr, "r300: texture_from_handle: The buffer is not " + "large enough. Got: %i, Need: %i, Info:\n", + max_buffer_size, desc->size_in_bytes); + r300_tex_print_info(rscreen, desc, "texture_from_handle"); + return FALSE; + } + + desc->buffer_size_in_bytes = max_buffer_size; + } else { + desc->buffer_size_in_bytes = desc->size_in_bytes; + + } + + if (SCREEN_DBG_ON(rscreen, DBG_TEX)) + r300_tex_print_info(rscreen, desc, "texture_from_handle"); + + return TRUE; +} + +unsigned r300_texture_get_offset(struct r300_texture_desc *desc, + unsigned level, unsigned zslice, + unsigned face) +{ + unsigned offset = desc->offset_in_bytes[level]; + + switch (desc->b.b.target) { + case PIPE_TEXTURE_3D: + assert(face == 0); + return offset + zslice * desc->layer_size_in_bytes[level]; + + case PIPE_TEXTURE_CUBE: + assert(zslice == 0); + return offset + face * desc->layer_size_in_bytes[level]; + + default: + assert(zslice == 0 && face == 0); + return offset; + } +} diff --git a/src/gallium/drivers/r300/r300_texture_desc.h b/src/gallium/drivers/r300/r300_texture_desc.h new file mode 100644 index 0000000000..95de66f654 --- /dev/null +++ b/src/gallium/drivers/r300/r300_texture_desc.h @@ -0,0 +1,57 @@ +/* + * Copyright 2008 Corbin Simpson + * Copyright 2010 Marek Olšák + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#ifndef R300_TEXTURE_DESC_H +#define R300_TEXTURE_DESC_H + +#include "r300_defines.h" + +struct pipe_resource; +struct r300_screen; +struct r300_texture_desc; +struct r300_texture; + +enum r300_dim { + DIM_WIDTH = 0, + DIM_HEIGHT = 1 +}; + +unsigned r300_get_pixel_alignment(enum pipe_format format, + unsigned num_samples, + enum r300_buffer_tiling microtile, + enum r300_buffer_tiling macrotile, + enum r300_dim dim); + +boolean r300_texture_desc_init(struct r300_screen *rscreen, + struct r300_texture_desc *desc, + const struct pipe_resource *base, + enum r300_buffer_tiling microtile, + enum r300_buffer_tiling macrotile, + unsigned stride_in_bytes_override, + unsigned max_buffer_size); + +unsigned r300_texture_get_offset(struct r300_texture_desc *desc, + unsigned level, unsigned zslice, + unsigned face); + +#endif diff --git a/src/gallium/drivers/r300/r300_transfer.c b/src/gallium/drivers/r300/r300_transfer.c index 3cc4c8c958..e9333b35ef 100644 --- a/src/gallium/drivers/r300/r300_transfer.c +++ b/src/gallium/drivers/r300/r300_transfer.c @@ -22,7 +22,7 @@ * USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include "r300_transfer.h" -#include "r300_texture.h" +#include "r300_texture_desc.h" #include "r300_screen_buffer.h" #include "util/u_memory.h" @@ -35,8 +35,8 @@ struct r300_transfer { /* Offset from start of buffer. */ unsigned offset; - /* Detiled texture. */ - struct r300_texture *detiled_texture; + /* Linear texture. */ + struct r300_texture *linear_texture; }; /* Convenience cast wrapper. */ @@ -57,7 +57,7 @@ static void r300_copy_from_tiled_texture(struct pipe_context *ctx, subdst.face = 0; subdst.level = 0; - ctx->resource_copy_region(ctx, &r300transfer->detiled_texture->b.b, subdst, + ctx->resource_copy_region(ctx, &r300transfer->linear_texture->desc.b.b, subdst, 0, 0, 0, tex, transfer->sr, transfer->box.x, transfer->box.y, transfer->box.z, @@ -77,7 +77,7 @@ static void r300_copy_into_tiled_texture(struct pipe_context *ctx, ctx->resource_copy_region(ctx, tex, transfer->sr, transfer->box.x, transfer->box.y, transfer->box.z, - &r300transfer->detiled_texture->b.b, subsrc, + &r300transfer->linear_texture->desc.b.b, subsrc, 0, 0, 0, transfer->box.width, transfer->box.height); @@ -93,7 +93,6 @@ r300_texture_get_transfer(struct pipe_context *ctx, { struct r300_context *r300 = r300_context(ctx); struct r300_texture *tex = r300_texture(texture); - struct r300_screen *r300screen = r300_screen(ctx->screen); struct r300_transfer *trans; struct pipe_resource base; boolean referenced_cs, referenced_hw, blittable; @@ -124,7 +123,7 @@ r300_texture_get_transfer(struct pipe_context *ctx, /* If the texture is tiled, we must create a temporary detiled texture * for this transfer. * Also make write transfers pipelined. */ - if (tex->microtile || tex->macrotile || + if (tex->desc.microtile || tex->desc.macrotile[sr.level] || ((referenced_hw & !(usage & PIPE_TRANSFER_READ)) && blittable)) { base.target = PIPE_TEXTURE_2D; base.format = texture->format; @@ -149,23 +148,23 @@ r300_texture_get_transfer(struct pipe_context *ctx, } /* Create the temporary texture. */ - trans->detiled_texture = r300_texture( + trans->linear_texture = r300_texture( ctx->screen->resource_create(ctx->screen, &base)); - if (!trans->detiled_texture) { + if (!trans->linear_texture) { /* Oh crap, the thing can't create the texture. * Let's flush and try again. */ ctx->flush(ctx, 0, NULL); - trans->detiled_texture = r300_texture( + trans->linear_texture = r300_texture( ctx->screen->resource_create(ctx->screen, &base)); - if (!trans->detiled_texture) { + if (!trans->linear_texture) { /* For linear textures, it's safe to fallback to * an unpipelined transfer. */ - if (!tex->microtile && !tex->macrotile) { + if (!tex->desc.microtile && !tex->desc.macrotile[sr.level]) { goto unpipelined; } @@ -177,8 +176,8 @@ r300_texture_get_transfer(struct pipe_context *ctx, } } - assert(!trans->detiled_texture->microtile && - !trans->detiled_texture->macrotile); + assert(!trans->linear_texture->desc.microtile && + !trans->linear_texture->desc.macrotile[0]); /* Set the stride. * @@ -188,7 +187,7 @@ r300_texture_get_transfer(struct pipe_context *ctx, * right thing internally. */ trans->transfer.stride = - r300_texture_get_stride(r300screen, trans->detiled_texture, 0); + trans->linear_texture->desc.stride_in_bytes[0]; if (usage & PIPE_TRANSFER_READ) { /* We cannot map a tiled texture directly because the data is @@ -203,9 +202,9 @@ r300_texture_get_transfer(struct pipe_context *ctx, unpipelined: /* Unpipelined transfer. */ - trans->transfer.stride = - r300_texture_get_stride(r300screen, tex, sr.level); - trans->offset = r300_texture_get_offset(tex, sr.level, box->z, sr.face); + trans->transfer.stride = tex->desc.stride_in_bytes[sr.level]; + trans->offset = r300_texture_get_offset(&tex->desc, + sr.level, box->z, sr.face); if (referenced_cs) ctx->flush(ctx, PIPE_FLUSH_RENDER_CACHE, NULL); @@ -219,13 +218,13 @@ void r300_texture_transfer_destroy(struct pipe_context *ctx, { struct r300_transfer *r300transfer = r300_transfer(trans); - if (r300transfer->detiled_texture) { + if (r300transfer->linear_texture) { if (trans->usage & PIPE_TRANSFER_WRITE) { r300_copy_into_tiled_texture(ctx, r300transfer); } pipe_resource_reference( - (struct pipe_resource**)&r300transfer->detiled_texture, NULL); + (struct pipe_resource**)&r300transfer->linear_texture, NULL); } pipe_resource_reference(&trans->resource, NULL); FREE(trans); @@ -239,13 +238,13 @@ void* r300_texture_transfer_map(struct pipe_context *ctx, struct r300_transfer *r300transfer = r300_transfer(transfer); struct r300_texture *tex = r300_texture(transfer->resource); char *map; - enum pipe_format format = tex->b.b.format; + enum pipe_format format = tex->desc.b.b.format; - if (r300transfer->detiled_texture) { + if (r300transfer->linear_texture) { /* The detiled texture is of the same size as the region being mapped * (no offset needed). */ return rws->buffer_map(rws, - r300transfer->detiled_texture->buffer, + r300transfer->linear_texture->buffer, r300->cs, transfer->usage); } else { @@ -270,8 +269,8 @@ void r300_texture_transfer_unmap(struct pipe_context *ctx, struct r300_transfer *r300transfer = r300_transfer(transfer); struct r300_texture *tex = r300_texture(transfer->resource); - if (r300transfer->detiled_texture) { - rws->buffer_unmap(rws, r300transfer->detiled_texture->buffer); + if (r300transfer->linear_texture) { + rws->buffer_unmap(rws, r300transfer->linear_texture->buffer); } else { rws->buffer_unmap(rws, tex->buffer); } -- cgit v1.2.3 From c92d232061c1aef6f5f56cbd815625778db2fd8c Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Sat, 24 Jul 2010 22:52:01 +0200 Subject: r300g: do not use TXPITCH_EN for power-of-two textures from the DDX We were using TXPITCH_EN for textures from the DDX since ever, for nothing. --- src/gallium/drivers/r300/r300_texture_desc.c | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/drivers/r300/r300_texture_desc.c b/src/gallium/drivers/r300/r300_texture_desc.c index 18a2bd31fd..becaa59bea 100644 --- a/src/gallium/drivers/r300/r300_texture_desc.c +++ b/src/gallium/drivers/r300/r300_texture_desc.c @@ -210,6 +210,14 @@ static void r300_texture_3d_fix_mipmapping(struct r300_screen *screen, } } +/* Get a width in pixels from a stride in bytes. */ +static unsigned stride_to_width(enum pipe_format format, + unsigned stride_in_bytes) +{ + return (stride_in_bytes / util_format_get_blocksize(format)) * + util_format_get_blockwidth(format); +} + static void r300_setup_miptree(struct r300_screen *screen, struct r300_texture_desc *desc) { @@ -246,9 +254,7 @@ static void r300_setup_miptree(struct r300_screen *screen, desc->size_in_bytes = desc->offset_in_bytes[i] + size; desc->layer_size_in_bytes[i] = layer_size; desc->stride_in_bytes[i] = stride; - desc->stride_in_pixels[i] = - (stride / util_format_get_blocksize(base->format)) * - util_format_get_blockwidth(base->format); + desc->stride_in_pixels[i] = stride_to_width(desc->b.b.format, stride); SCREEN_DBG(screen, DBG_TEXALLOC, "r300: Texture miptree: Level %d " "(%dx%dx%d px, pitch %d bytes) %d bytes total, macrotiled %s\n", @@ -261,9 +267,11 @@ static void r300_setup_miptree(struct r300_screen *screen, static void r300_setup_flags(struct r300_texture_desc *desc) { desc->uses_stride_addressing = - !util_is_power_of_two(desc->b.b.width0) || - !util_is_power_of_two(desc->b.b.height0) || - desc->stride_in_bytes_override; + !util_is_power_of_two(desc->b.b.width0) || + !util_is_power_of_two(desc->b.b.height0) || + (desc->stride_in_bytes_override && + stride_to_width(desc->b.b.format, + desc->stride_in_bytes_override) != desc->b.b.width0); } static void r300_setup_cbzb_flags(struct r300_screen *rscreen, -- cgit v1.2.3 From 49330fc5ac13e25cb201e62995329cffaf5046f0 Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Sat, 24 Jul 2010 23:05:40 +0200 Subject: r300g: do not use TXPITCH_EN if the width is POT and the height is NPOT --- src/gallium/drivers/r300/r300_context.h | 13 ++++++------- src/gallium/drivers/r300/r300_fs.c | 2 +- src/gallium/drivers/r300/r300_state.c | 2 +- src/gallium/drivers/r300/r300_state_derived.c | 2 +- src/gallium/drivers/r300/r300_texture.c | 3 ++- src/gallium/drivers/r300/r300_texture_desc.c | 4 ++++ 6 files changed, 15 insertions(+), 11 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h index 06e4e12558..b4256c6278 100644 --- a/src/gallium/drivers/r300/r300_context.h +++ b/src/gallium/drivers/r300/r300_context.h @@ -361,13 +361,12 @@ struct r300_texture_desc { */ unsigned stride_in_bytes_override; - /* Whether this texture has non-power-of-two dimensions - * or a user-specified stride. - * It can be either a regular texture or a rectangle one. - * - * This flag says that hardware must use the stride for addressing - * instead of the width. - */ + /* Whether this texture has non-power-of-two dimensions. + * It can be either a regular texture or a rectangle one. */ + boolean is_npot; + + /* This flag says that hardware must use the stride for addressing + * instead of the width. */ boolean uses_stride_addressing; /* Whether CBZB fast color clear is allowed on the miplevel. */ diff --git a/src/gallium/drivers/r300/r300_fs.c b/src/gallium/drivers/r300/r300_fs.c index 6eac12bfb9..db5269912e 100644 --- a/src/gallium/drivers/r300/r300_fs.c +++ b/src/gallium/drivers/r300/r300_fs.c @@ -173,7 +173,7 @@ static void get_external_state( t = (struct r300_texture*)texstate->sampler_views[i]->base.texture; /* XXX this should probably take into account STR, not just S. */ - if (t->desc.uses_stride_addressing) { + if (t->desc.is_npot) { switch (s->state.wrap_s) { case PIPE_TEX_WRAP_REPEAT: state->unit[i].wrap_mode = RC_WRAP_REPEAT; diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c index 6e2a6ca0e4..bbea7e1589 100644 --- a/src/gallium/drivers/r300/r300_state.c +++ b/src/gallium/drivers/r300/r300_state.c @@ -1296,7 +1296,7 @@ static void r300_set_fragment_sampler_views(struct pipe_context* pipe, /* Set the texrect factor in the fragment shader. * Needed for RECT and NPOT fallback. */ texture = r300_texture(views[i]->texture); - if (texture->desc.uses_stride_addressing) { + if (texture->desc.is_npot) { r300->fs_rc_constant_state.dirty = TRUE; } diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c index e20d8d0fdf..48912e1555 100644 --- a/src/gallium/drivers/r300/r300_state_derived.c +++ b/src/gallium/drivers/r300/r300_state_derived.c @@ -583,7 +583,7 @@ static void r300_merge_textures_and_samplers(struct r300_context* r300) texstate->filter0 |= R300_TX_WRAP_T(R300_TX_CLAMP_TO_EDGE); } - if (tex->desc.uses_stride_addressing) { + if (tex->desc.is_npot) { /* NPOT textures don't support mip filter, unfortunately. * This prevents incorrect rendering. */ texstate->filter0 &= ~R300_TX_MIN_FILTER_MIP_MASK; diff --git a/src/gallium/drivers/r300/r300_texture.c b/src/gallium/drivers/r300/r300_texture.c index e99a4630ee..f1118dfd7d 100644 --- a/src/gallium/drivers/r300/r300_texture.c +++ b/src/gallium/drivers/r300/r300_texture.c @@ -552,7 +552,8 @@ static void r300_texture_setup_immutable_state(struct r300_screen* screen, f->format0 |= R300_TX_PITCH_EN; f->format2 = (tex->desc.stride_in_pixels[0] - 1) & 0x1fff; } else { - /* power of two textures (3D, mipmaps, and no pitch) */ + /* Power of two textures (3D, mipmaps, and no pitch), + * also NPOT textures with a width being POT. */ f->format0 |= R300_TX_DEPTH(util_logbase2(pt->depth0) & 0xf); } diff --git a/src/gallium/drivers/r300/r300_texture_desc.c b/src/gallium/drivers/r300/r300_texture_desc.c index becaa59bea..02591aa01f 100644 --- a/src/gallium/drivers/r300/r300_texture_desc.c +++ b/src/gallium/drivers/r300/r300_texture_desc.c @@ -272,6 +272,10 @@ static void r300_setup_flags(struct r300_texture_desc *desc) (desc->stride_in_bytes_override && stride_to_width(desc->b.b.format, desc->stride_in_bytes_override) != desc->b.b.width0); + + desc->is_npot = + desc->uses_stride_addressing || + !util_is_power_of_two(desc->b.b.height0); } static void r300_setup_cbzb_flags(struct r300_screen *rscreen, -- cgit v1.2.3 From 451a0ddb190e5185372fed9ec57d24a822442ecc Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Sun, 25 Jul 2010 10:07:12 +0200 Subject: r300g: make sure a texture is large enough for the CBZB clear The number of macrotiles in the Y direction must be even, otherwise memory corruption may happen (e.g. broken fonts). Basically, if we get a buffer in resource_from_handle, we can determine from the buffer size whether it's safe to use the CBZB clear or not. --- src/gallium/drivers/r300/r300_texture_desc.c | 101 +++++++++++++-------------- 1 file changed, 49 insertions(+), 52 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/drivers/r300/r300_texture_desc.c b/src/gallium/drivers/r300/r300_texture_desc.c index 02591aa01f..343089bf2c 100644 --- a/src/gallium/drivers/r300/r300_texture_desc.c +++ b/src/gallium/drivers/r300/r300_texture_desc.c @@ -154,7 +154,7 @@ static unsigned r300_texture_get_stride(struct r300_screen *screen, static unsigned r300_texture_get_nblocksy(struct r300_texture_desc *desc, unsigned level, - boolean align_for_cbzb) + boolean *out_aligned_for_cbzb) { unsigned height, tile_height; @@ -175,12 +175,28 @@ static unsigned r300_texture_get_nblocksy(struct r300_texture_desc *desc, height = util_next_power_of_two(height); } - /* Allocate an even number of macrotiles for the CBZB clear. - * Do so for 3 or more macrotiles in the Y direction. */ - if (align_for_cbzb && - level == 0 && desc->b.b.last_level == 0 && - desc->macrotile[0] && height >= tile_height * 3) { - height = align(height, tile_height * 2); + /* See if the CBZB clear can be used on the buffer, + * taking the texture size into account. */ + if (out_aligned_for_cbzb) { + if (desc->macrotile[level]) { + /* When clearing, the layer (width*height) is horizontally split + * into two, and the upper and lower halves are cleared by the CB + * and ZB units, respectively. Therefore, the number of macrotiles + * in the Y direction must be even. */ + + /* Align the height so that there is an even number of macrotiles. + * Do so for 3 or more macrotiles in the Y direction. */ + if (level == 0 && desc->b.b.last_level == 0 && + (desc->b.b.target == PIPE_TEXTURE_1D || + desc->b.b.target == PIPE_TEXTURE_2D) && + height >= tile_height * 3) { + height = align(height, tile_height * 2); + } + + *out_aligned_for_cbzb = height % (tile_height * 2) == 0; + } else { + *out_aligned_for_cbzb = FALSE; + } } } @@ -219,11 +235,15 @@ static unsigned stride_to_width(enum pipe_format format, } static void r300_setup_miptree(struct r300_screen *screen, - struct r300_texture_desc *desc) + struct r300_texture_desc *desc, + boolean align_for_cbzb) { struct pipe_resource *base = &desc->b.b; unsigned stride, size, layer_size, nblocksy, i; boolean rv350_mode = screen->caps.is_rv350; + boolean aligned_for_cbzb; + + desc->size_in_bytes = 0; SCREEN_DBG(screen, DBG_TEXALLOC, "r300: Making miptree for texture, format %s\n", @@ -238,7 +258,15 @@ static void r300_setup_miptree(struct r300_screen *screen, R300_BUFFER_TILED : R300_BUFFER_LINEAR; stride = r300_texture_get_stride(screen, desc, i); - nblocksy = r300_texture_get_nblocksy(desc, i, desc->stride_in_bytes_override == 0); + + /* Compute the number of blocks in Y, see if the CBZB clear can be + * used on the texture. */ + aligned_for_cbzb = FALSE; + if (align_for_cbzb && desc->cbzb_allowed[i]) + nblocksy = r300_texture_get_nblocksy(desc, i, &aligned_for_cbzb); + else + nblocksy = r300_texture_get_nblocksy(desc, i, NULL); + layer_size = stride * nblocksy; if (base->nr_samples) { @@ -255,6 +283,7 @@ static void r300_setup_miptree(struct r300_screen *screen, desc->layer_size_in_bytes[i] = layer_size; desc->stride_in_bytes[i] = stride; desc->stride_in_pixels[i] = stride_to_width(desc->b.b.format, stride); + desc->cbzb_allowed[i] = desc->cbzb_allowed[i] && aligned_for_cbzb; SCREEN_DBG(screen, DBG_TEXALLOC, "r300: Texture miptree: Level %d " "(%dx%dx%d px, pitch %d bytes) %d bytes total, macrotiled %s\n", @@ -296,44 +325,6 @@ static void r300_setup_cbzb_flags(struct r300_screen *rscreen, for (i = 0; i <= desc->b.b.last_level; i++) desc->cbzb_allowed[i] = first_level_valid && desc->macrotile[i]; - return; -#if 0 - /* When clearing, the layer (width*height) is horizontally split - * into two, and the upper and lower halves are cleared by the CB - * and ZB units, respectively. Therefore, the number of macrotiles - * in the Y direction must be even. */ - - if (desc->b.b.last_level > 0 || - desc->b.b.target == PIPE_TEXTURE_3D || - desc->b.b.target == PIPE_TEXTURE_CUBE) { - /* For mipmapped, 3D, or cube textures, just check if there are - * enough macrotiles per layer. */ - for (i = 0; i <= desc->b.b.last_level; i++) { - desc->cbzb_allowed[i] = FALSE; - - if (first_level_valid && desc->macrotile[i]) { - unsigned height, tile_height, num_macrotiles; - - /* Compute the number of macrotiles in the Y direction. */ - tile_height = r300_get_pixel_alignment(desc->b.b.format, - desc->b.b.nr_samples, - desc->microtile, - R300_BUFFER_TILED, - DIM_HEIGHT); - height = r300_texture_get_height(desc, i); - num_macrotiles = height / tile_height; - - desc->cbzb_allowed[i] = num_macrotiles % 2 == 0; - } - } - } else { - /* For 1D and 2D non-mipmapped textures */ - unsigned layer_size; - - layer_size = desc->stride_in_bytes[0] * - r300_texture_get_nblocksy(desc, 0, TRUE); - } -#endif } static void r300_setup_tiling(struct r300_screen *screen, @@ -409,8 +400,6 @@ boolean r300_texture_desc_init(struct r300_screen *rscreen, desc->stride_in_bytes_override = stride_in_bytes_override; - r300_setup_flags(desc); - if (microtile == R300_BUFFER_SELECT_LAYOUT || macrotile == R300_BUFFER_SELECT_LAYOUT) { r300_setup_tiling(rscreen, desc); @@ -420,10 +409,19 @@ boolean r300_texture_desc_init(struct r300_screen *rscreen, assert(desc->b.b.last_level == 0); } - r300_setup_miptree(rscreen, desc); - r300_texture_3d_fix_mipmapping(rscreen, desc); + r300_setup_flags(desc); r300_setup_cbzb_flags(rscreen, desc); + /* Setup the miptree description. */ + r300_setup_miptree(rscreen, desc, TRUE); + /* If the required buffer size is larger the given max size, + * try again without the alignment for the CBZB clear. */ + if (max_buffer_size && desc->size_in_bytes > max_buffer_size) { + r300_setup_miptree(rscreen, desc, FALSE); + } + + r300_texture_3d_fix_mipmapping(rscreen, desc); + if (max_buffer_size) { /* Make sure the buffer we got is large enough. */ if (desc->size_in_bytes > max_buffer_size) { @@ -437,7 +435,6 @@ boolean r300_texture_desc_init(struct r300_screen *rscreen, desc->buffer_size_in_bytes = max_buffer_size; } else { desc->buffer_size_in_bytes = desc->size_in_bytes; - } if (SCREEN_DBG_ON(rscreen, DBG_TEX)) -- cgit v1.2.3 From 6f2936c654c68388b9c43a189a1b8c06f3a9d241 Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Sun, 25 Jul 2010 23:40:51 +0200 Subject: r300g: implement D24X8 texture sampling for r3xx-r4xx Because the hw can't sample it, I reinterpret the format as G16R16 and sample the G component. This gives 16 bits of precision, which should be enough for depth texturing (surprisingly, the sampled values are exactly the same as in D16 textures). This also enables EXT_packed_depth_stencil on those old chipsets, finally. --- src/gallium/drivers/r300/r300_screen.c | 4 ---- src/gallium/drivers/r300/r300_state.c | 6 +++-- src/gallium/drivers/r300/r300_state_derived.c | 33 +++++++++++++++++---------- src/gallium/drivers/r300/r300_texture.c | 10 +++++--- src/gallium/drivers/r300/r300_texture.h | 3 ++- 5 files changed, 34 insertions(+), 22 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c index 5a11b98eb6..676430f5fe 100644 --- a/src/gallium/drivers/r300/r300_screen.c +++ b/src/gallium/drivers/r300/r300_screen.c @@ -257,8 +257,6 @@ static boolean r300_is_format_supported(struct pipe_screen* screen, uint32_t retval = 0; boolean is_r500 = r300_screen(screen)->caps.is_r500; boolean is_r400 = r300_screen(screen)->caps.is_r400; - boolean is_z24 = format == PIPE_FORMAT_X8Z24_UNORM || - format == PIPE_FORMAT_S8_USCALED_Z24_UNORM; boolean is_color2101010 = format == PIPE_FORMAT_R10G10B10A2_UNORM || format == PIPE_FORMAT_R10G10B10X2_SNORM || format == PIPE_FORMAT_B10G10R10A2_UNORM || @@ -293,8 +291,6 @@ static boolean r300_is_format_supported(struct pipe_screen* screen, /* Check sampler format support. */ if ((usage & PIPE_BIND_SAMPLER_VIEW) && - /* Z24 cannot be sampled from on non-r5xx. */ - (is_r500 || !is_z24) && /* ATI1N is r5xx-only. */ (is_r500 || !is_ati1n) && /* ATI2N is supported on r4xx-r5xx. */ diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c index bbea7e1589..3e221f2e02 100644 --- a/src/gallium/drivers/r300/r300_state.c +++ b/src/gallium/drivers/r300/r300_state.c @@ -1330,6 +1330,7 @@ r300_create_sampler_view(struct pipe_context *pipe, { struct r300_sampler_view *view = CALLOC_STRUCT(r300_sampler_view); struct r300_texture *tex = r300_texture(texture); + boolean is_r500 = r300_screen(pipe->screen)->caps.is_r500; if (view) { view->base = *templ; @@ -1345,8 +1346,9 @@ r300_create_sampler_view(struct pipe_context *pipe, view->format = tex->tx_format; view->format.format1 |= r300_translate_texformat(templ->format, - view->swizzle); - if (r300_screen(pipe->screen)->caps.is_r500) { + view->swizzle, + is_r500); + if (is_r500) { view->format.format2 |= r500_tx_format_msb_bit(templ->format); } } diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c index 48912e1555..a85db27064 100644 --- a/src/gallium/drivers/r300/r300_state_derived.c +++ b/src/gallium/drivers/r300/r300_state_derived.c @@ -528,15 +528,9 @@ static void r300_merge_textures_and_samplers(struct r300_context* r300) struct r300_sampler_state *sampler; struct r300_sampler_view *view; struct r300_texture *tex; - unsigned min_level, max_level, i, size; + unsigned min_level, max_level, i, j, size; unsigned count = MIN2(state->sampler_view_count, state->sampler_state_count); - unsigned char depth_swizzle[4] = { - UTIL_FORMAT_SWIZZLE_X, - UTIL_FORMAT_SWIZZLE_X, - UTIL_FORMAT_SWIZZLE_X, - UTIL_FORMAT_SWIZZLE_X - }; /* The KIL opcode fix, see below. */ if (!count && !r300->screen->caps.is_r500) @@ -563,14 +557,29 @@ static void r300_merge_textures_and_samplers(struct r300_context* r300) /* Assign a texture cache region. */ texstate->format.format1 |= view->texcache_region; - /* If compare mode is disabled, the sampler view swizzles - * are stored in the format. - * Otherwise, swizzles must be applied after the compare mode - * in the fragment shader. */ + /* Depth textures are kinda special. */ if (util_format_is_depth_or_stencil(tex->desc.b.b.format)) { + unsigned char depth_swizzle[4]; + + if (!r300->screen->caps.is_r500 && + util_format_get_blocksizebits(tex->desc.b.b.format) == 32) { + /* X24x8 is sampled as Y16X16 on r3xx-r4xx. + * The depth here is at the Y component. */ + for (j = 0; j < 4; j++) + depth_swizzle[j] = UTIL_FORMAT_SWIZZLE_Y; + } else { + for (j = 0; j < 4; j++) + depth_swizzle[j] = UTIL_FORMAT_SWIZZLE_X; + } + + /* If compare mode is disabled, sampler view swizzles + * are stored in the format. + * Otherwise, the swizzles must be applied after the compare + * mode in the fragment shader. */ if (sampler->state.compare_mode == PIPE_TEX_COMPARE_NONE) { texstate->format.format1 |= - r300_get_swizzle_combined(depth_swizzle, view->swizzle); + r300_get_swizzle_combined(depth_swizzle, + view->swizzle); } else { texstate->format.format1 |= r300_get_swizzle_combined(depth_swizzle, 0); diff --git a/src/gallium/drivers/r300/r300_texture.c b/src/gallium/drivers/r300/r300_texture.c index f1118dfd7d..fcdca5605e 100644 --- a/src/gallium/drivers/r300/r300_texture.c +++ b/src/gallium/drivers/r300/r300_texture.c @@ -105,7 +105,8 @@ unsigned r300_get_swizzle_combined(const unsigned char *swizzle_format, * The FORMAT specifies how the texture sampler will treat the texture, and * makes available X, Y, Z, W, ZERO, and ONE for swizzling. */ uint32_t r300_translate_texformat(enum pipe_format format, - const unsigned char *swizzle_view) + const unsigned char *swizzle_view, + boolean is_r500) { uint32_t result = 0; const struct util_format_description *desc; @@ -130,7 +131,10 @@ uint32_t r300_translate_texformat(enum pipe_format format, return R300_TX_FORMAT_X16; case PIPE_FORMAT_X8Z24_UNORM: case PIPE_FORMAT_S8_USCALED_Z24_UNORM: - return R500_TX_FORMAT_Y8X24; + if (is_r500) + return R500_TX_FORMAT_Y8X24; + else + return R300_TX_FORMAT_Y16X16; default: return ~0; /* Unsupported. */ } @@ -533,7 +537,7 @@ boolean r300_is_zs_format_supported(enum pipe_format format) boolean r300_is_sampler_format_supported(enum pipe_format format) { - return r300_translate_texformat(format, 0) != ~0; + return r300_translate_texformat(format, 0, TRUE) != ~0; } static void r300_texture_setup_immutable_state(struct r300_screen* screen, diff --git a/src/gallium/drivers/r300/r300_texture.h b/src/gallium/drivers/r300/r300_texture.h index 585036ab3b..a4524320fd 100644 --- a/src/gallium/drivers/r300/r300_texture.h +++ b/src/gallium/drivers/r300/r300_texture.h @@ -35,7 +35,8 @@ unsigned r300_get_swizzle_combined(const unsigned char *swizzle_format, const unsigned char *swizzle_view); uint32_t r300_translate_texformat(enum pipe_format format, - const unsigned char *swizzle_view); + const unsigned char *swizzle_view, + boolean is_r500); uint32_t r500_tx_format_msb_bit(enum pipe_format format); -- cgit v1.2.3 From 8c26dc2bfeaa2504d6bcc31caa200299d47772b8 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Mon, 26 Jul 2010 11:56:12 +1000 Subject: r300g: fix macro substitution problem isn't a problem yet, but have issues in hiz branch. Signed-off-by: Dave Airlie --- src/gallium/drivers/r300/r300_cs.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/drivers/r300/r300_cs.h b/src/gallium/drivers/r300/r300_cs.h index 3beb625d43..c194d6a1b0 100644 --- a/src/gallium/drivers/r300/r300_cs.h +++ b/src/gallium/drivers/r300/r300_cs.h @@ -136,8 +136,8 @@ #define WRITE_CS_TABLE(values, count) do { \ CS_DEBUG(assert(cs_count == 0);) \ - memcpy(cs_copy->ptr + cs_copy->cdw, values, count * 4); \ - cs_copy->cdw += count; \ + memcpy(cs_copy->ptr + cs_copy->cdw, (values), (count) * 4); \ + cs_copy->cdw += (count); \ } while (0) #endif /* R300_CS_H */ -- cgit v1.2.3 From d26fb6916931f10e029429ecbf46e86484e7e956 Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Mon, 26 Jul 2010 14:53:06 +0200 Subject: util: fix mutex leaks in mempool --- src/gallium/auxiliary/util/u_mempool.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'src/gallium') diff --git a/src/gallium/auxiliary/util/u_mempool.c b/src/gallium/auxiliary/util/u_mempool.c index 84e2a34acc..6b1a72a7f6 100644 --- a/src/gallium/auxiliary/util/u_mempool.c +++ b/src/gallium/auxiliary/util/u_mempool.c @@ -126,7 +126,6 @@ void util_mempool_set_thread_safety(struct util_mempool *pool, pool->threading = threading; if (threading) { - pipe_mutex_init(pool->mutex); pool->malloc = util_mempool_malloc_mt; pool->free = util_mempool_free_mt; } else { @@ -152,6 +151,8 @@ void util_mempool_create(struct util_mempool *pool, make_empty_list(&pool->list); + pipe_mutex_init(pool->mutex); + util_mempool_set_thread_safety(pool, threading); } -- cgit v1.2.3 From a3a42e46965221b8f8249f0f1076fc3544b68d0e Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Mon, 26 Jul 2010 14:56:48 +0200 Subject: util: fix another mutex leak in mempool By fixing one, I introduced another. Crap. --- src/gallium/auxiliary/util/u_mempool.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/auxiliary/util/u_mempool.c b/src/gallium/auxiliary/util/u_mempool.c index 6b1a72a7f6..1f336b39a1 100644 --- a/src/gallium/auxiliary/util/u_mempool.c +++ b/src/gallium/auxiliary/util/u_mempool.c @@ -165,6 +165,5 @@ void util_mempool_destroy(struct util_mempool *pool) FREE(page); } - if (pool->threading) - pipe_mutex_destroy(pool->mutex); + pipe_mutex_destroy(pool->mutex); } -- cgit v1.2.3 From 0bebdc230ff09f191cfa269c2cbcbb257fd2e0fc Mon Sep 17 00:00:00 2001 From: Stephan Schmid Date: Mon, 26 Jul 2010 07:52:12 +0200 Subject: r600g: implememt the LIT instruction --- src/gallium/drivers/r600/r600_shader.c | 120 ++++++++++++++++++++++++++++++++- 1 file changed, 119 insertions(+), 1 deletion(-) (limited to 'src/gallium') diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index e865f013f7..e5e6786fd0 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -583,6 +583,124 @@ static int tgsi_slt(struct r600_shader_ctx *ctx) return 0; } +static int tgsi_lit(struct r600_shader_ctx *ctx) +{ + struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; + struct r600_bc_alu alu; + + int r; + + + if (inst->Dst[0].Register.WriteMask & (1 << 0)) + { + /* dst.x, <- 1.0 */ + memset(&alu, 0, sizeof(struct r600_bc_alu)); + alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV; + alu.src[0].sel = 249; /*1.0*/ + alu.src[0].chan = 0; + r = tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst); + if (r) + return r; + if ((inst->Dst[0].Register.WriteMask & 0xe) == 0) + alu.last = 1; + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + } + + + if (inst->Dst[0].Register.WriteMask & (1 << 1)) + { + /* dst.y = max(src.x, 0.0) */ + memset(&alu, 0, sizeof(struct r600_bc_alu)); + alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX; + r = tgsi_src(ctx, &inst->Src[0], 0, &alu.src[0]); + if (r) + return r; + alu.src[1].sel = 248; /*0.0*/ + alu.src[1].chan = 0; + r = tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst); + if (r) + return r; + if ((inst->Dst[0].Register.WriteMask & 0xa) == 0) + alu.last = 1; + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + } + + if (inst->Dst[0].Register.WriteMask & (1 << 3)) + { + /* dst.w, <- 1.0 */ + memset(&alu, 0, sizeof(struct r600_bc_alu)); + alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV; + alu.src[0].sel = 249; + alu.src[0].chan = 0; + r = tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst); + if (r) + return r; + if ((inst->Dst[0].Register.WriteMask & 0x4) == 0) + alu.last = 1; + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + } + + if (inst->Dst[0].Register.WriteMask & (1 << 2)) + { + /* dst.z = log(src.y) */ + memset(&alu, 0, sizeof(struct r600_bc_alu)); + alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED; + r = tgsi_src(ctx, &inst->Src[0], 1, &alu.src[0]); + if (r) + return r; + r = tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst); + if (r) + return r; + alu.last = 1; + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + + int chan = alu.dst.chan; + int sel = alu.dst.sel; + + /* tmp.x = amd MUL_LIT(src.w, dst.z, src.x ) */ + memset(&alu, 0, sizeof(struct r600_bc_alu)); + alu.inst = V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT; + r = tgsi_src(ctx, &inst->Src[0], 3, &alu.src[0]); + if (r) + return r; + alu.src[1].sel = sel; + alu.src[1].chan = chan; + r = tgsi_src(ctx, &inst->Src[0], 0, &alu.src[2]); + if (r) + return r; + alu.dst.sel = ctx->temp_reg; + alu.dst.chan = 0; + alu.dst.write = 1; + alu.is_op3 = 1; + alu.last = 1; + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + + /* dst.z = exp(tmp.x) */ + memset(&alu, 0, sizeof(struct r600_bc_alu)); + alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE; + alu.src[0].sel = ctx->temp_reg; + alu.src[0].chan = 0; + r = tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst); + if (r) + return r; + alu.last = 1; + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + } + return 0; +} + static int tgsi_trans(struct r600_shader_ctx *ctx) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; @@ -735,7 +853,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = { {TGSI_OPCODE_ARL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_MOV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2}, - {TGSI_OPCODE_LIT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_LIT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit}, {TGSI_OPCODE_RCP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_RSQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE, tgsi_trans}, {TGSI_OPCODE_EXP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, -- cgit v1.2.3 From 4830237660be23ff67b2dd538947c285cde4b715 Mon Sep 17 00:00:00 2001 From: nobled Date: Sat, 24 Jul 2010 12:04:29 +0000 Subject: i915g: Fix llvm build Acked-by: Jakob Bornecrantz --- src/gallium/targets/dri-i915/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium') diff --git a/src/gallium/targets/dri-i915/Makefile b/src/gallium/targets/dri-i915/Makefile index e88c3c9f66..9c10d71a4a 100644 --- a/src/gallium/targets/dri-i915/Makefile +++ b/src/gallium/targets/dri-i915/Makefile @@ -22,7 +22,7 @@ DRIVER_DEFINES = \ -DGALLIUM_RBUG -DGALLIUM_TRACE -DGALLIUM_GALAHAD -DGALLIUM_SOFTPIPE ifeq ($(MESA_LLVM),1) -DRIVER_DEFINS += -DGALLIUM_LLVMPIPE +DRIVER_DEFINES += -DGALLIUM_LLVMPIPE endif include ../Makefile.dri -- cgit v1.2.3 From 0697d41fce6e7cad4badc54cecdd25f1f312c93f Mon Sep 17 00:00:00 2001 From: Jakob Bornecrantz Date: Thu, 22 Jul 2010 21:50:27 -0700 Subject: i965g: Enable llvm in dri driver if built --- src/gallium/targets/dri-i965/Makefile | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/targets/dri-i965/Makefile b/src/gallium/targets/dri-i965/Makefile index 3679c075b2..4b50d04255 100644 --- a/src/gallium/targets/dri-i965/Makefile +++ b/src/gallium/targets/dri-i965/Makefile @@ -6,10 +6,11 @@ LIBNAME = i965_dri.so PIPE_DRIVERS = \ $(TOP)/src/gallium/state_trackers/dri/drm/libdridrm.a \ $(TOP)/src/gallium/winsys/i965/drm/libi965drm.a \ - $(TOP)/src/gallium/drivers/trace/libtrace.a \ - $(TOP)/src/gallium/drivers/rbug/librbug.a \ $(TOP)/src/gallium/winsys/sw/wrapper/libwsw.a \ $(TOP)/src/gallium/drivers/softpipe/libsoftpipe.a \ + $(TOP)/src/gallium/drivers/galahad/libgalahad.a \ + $(TOP)/src/gallium/drivers/trace/libtrace.a \ + $(TOP)/src/gallium/drivers/rbug/librbug.a \ $(TOP)/src/gallium/drivers/i965/libi965.a C_SOURCES = \ @@ -18,7 +19,11 @@ C_SOURCES = \ $(DRIVER_SOURCES) DRIVER_DEFINES = \ - -DGALLIUM_SOFTPIPE -DGALLIUM_RBUG -DGALLIUM_TRACE + -DGALLIUM_RBUG -DGALLIUM_TRACE -DGALLIUM_GALAHAD -DGALLIUM_SOFTPIPE + +ifeq ($(MESA_LLVM),1) +DRIVER_DEFINES += -DGALLIUM_LLVMPIPE +endif include ../Makefile.dri -- cgit v1.2.3 From b1ef3e08634e3c382c5dc10c3000427a8f7a4bfa Mon Sep 17 00:00:00 2001 From: nobled Date: Sat, 24 Jul 2010 12:05:30 +0000 Subject: st/xorg: fix use-after-free Acked-by: Jakob Bornecrantz --- src/gallium/state_trackers/xorg/xorg_driver.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium') diff --git a/src/gallium/state_trackers/xorg/xorg_driver.c b/src/gallium/state_trackers/xorg/xorg_driver.c index e993ccc9bf..e10ff2f950 100644 --- a/src/gallium/state_trackers/xorg/xorg_driver.c +++ b/src/gallium/state_trackers/xorg/xorg_driver.c @@ -472,7 +472,6 @@ drv_pre_init(ScrnInfoPtr pScrn, int flags) max_height = max < max_height ? max : max_height; } - drmModeFreeResources(res); xf86CrtcSetSizeRange(pScrn, res->min_width, res->min_height, max_width, max_height); xf86DrvMsg(pScrn->scrnIndex, X_PROBED, @@ -481,6 +480,7 @@ drv_pre_init(ScrnInfoPtr pScrn, int flags) xf86DrvMsg(pScrn->scrnIndex, X_PROBED, "Min height %d, Max Height %d.\n", res->min_height, max_height); + drmModeFreeResources(res); } -- cgit v1.2.3 From 3cef6c42bc0966ee988c0e67935053e8ed93ab5e Mon Sep 17 00:00:00 2001 From: nobled Date: Sat, 24 Jul 2010 12:59:40 +0000 Subject: util: fix CPU detection on OS X s/PIPE_OS_DARWIN/PIPE_OS_APPLE, since there is no PIPE_OS_DARWIN. Acked-by: Vinson Lee --- src/gallium/auxiliary/util/u_cpu_detect.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/auxiliary/util/u_cpu_detect.c b/src/gallium/auxiliary/util/u_cpu_detect.c index a08241971c..23d33af4e4 100644 --- a/src/gallium/auxiliary/util/u_cpu_detect.c +++ b/src/gallium/auxiliary/util/u_cpu_detect.c @@ -38,7 +38,7 @@ #include "u_cpu_detect.h" #if defined(PIPE_ARCH_PPC) -#if defined(PIPE_OS_DARWIN) +#if defined(PIPE_OS_APPLE) #include #else #include @@ -132,7 +132,7 @@ win32_sig_handler_sse(EXCEPTION_POINTERS* ep) #endif /* PIPE_ARCH_X86 */ -#if defined(PIPE_ARCH_PPC) && !defined(PIPE_OS_DARWIN) +#if defined(PIPE_ARCH_PPC) && !defined(PIPE_OS_APPLE) static jmp_buf __lv_powerpc_jmpbuf; static volatile sig_atomic_t __lv_powerpc_canjump = 0; @@ -153,7 +153,7 @@ sigill_handler(int sig) static void check_os_altivec_support(void) { -#if defined(PIPE_OS_DARWIN) +#if defined(PIPE_OS_APPLE) int sels[2] = {CTL_HW, HW_VECTORUNIT}; int has_vu = 0; int len = sizeof (has_vu); @@ -166,8 +166,8 @@ check_os_altivec_support(void) util_cpu_caps.has_altivec = 1; } } -#else /* !PIPE_OS_DARWIN */ - /* no Darwin, do it the brute-force way */ +#else /* !PIPE_OS_APPLE */ + /* not on Apple/Darwin, do it the brute-force way */ /* this is borrowed from the libmpeg2 library */ signal(SIGILL, sigill_handler); if (setjmp(__lv_powerpc_jmpbuf)) { @@ -184,7 +184,7 @@ check_os_altivec_support(void) signal(SIGILL, SIG_DFL); util_cpu_caps.has_altivec = 1; } -#endif /* PIPE_OS_DARWIN */ +#endif /* !PIPE_OS_APPLE */ } #endif /* PIPE_ARCH_PPC */ -- cgit v1.2.3 From c88fc26ac9774e992501fe219caf71b290993fbf Mon Sep 17 00:00:00 2001 From: nobled Date: Tue, 8 Jun 2010 13:00:17 +0000 Subject: st/egl: Fix debug line Acked-by: Jakob Bornecrantz --- src/gallium/state_trackers/egl/common/egl_g3d_image.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium') diff --git a/src/gallium/state_trackers/egl/common/egl_g3d_image.c b/src/gallium/state_trackers/egl/common/egl_g3d_image.c index b1fe30a776..1e13cfcf7e 100644 --- a/src/gallium/state_trackers/egl/common/egl_g3d_image.c +++ b/src/gallium/state_trackers/egl/common/egl_g3d_image.c @@ -78,7 +78,7 @@ egl_g3d_create_image(_EGLDriver *drv, _EGLDisplay *dpy, _EGLContext *ctx, gimg = CALLOC_STRUCT(egl_g3d_image); if (!gimg) { - _eglError(EGL_BAD_ALLOC, "eglCreatePbufferSurface"); + _eglError(EGL_BAD_ALLOC, "eglCreateEGLImageKHR"); return NULL; } -- cgit v1.2.3 From b17ee335e3398cd1bbd26f5411e7ee6fb6839286 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Mon, 26 Jul 2010 20:48:29 -0600 Subject: util: fix unused function warning on non-x86 --- src/gallium/auxiliary/util/u_cpu_detect.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'src/gallium') diff --git a/src/gallium/auxiliary/util/u_cpu_detect.c b/src/gallium/auxiliary/util/u_cpu_detect.c index 23d33af4e4..879643463f 100644 --- a/src/gallium/auxiliary/util/u_cpu_detect.c +++ b/src/gallium/auxiliary/util/u_cpu_detect.c @@ -75,7 +75,10 @@ struct util_cpu_caps util_cpu_caps; +#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) static int has_cpuid(void); +#endif + #if defined(PIPE_ARCH_X86) -- cgit v1.2.3 From d2c714627ca490b23639ba4c32603f56b9d993e3 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Mon, 26 Jul 2010 20:48:46 -0600 Subject: cell: added const qualifier --- src/gallium/drivers/cell/ppu/cell_context.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium') diff --git a/src/gallium/drivers/cell/ppu/cell_context.h b/src/gallium/drivers/cell/ppu/cell_context.h index 07b6eebc69..dc46e59a2d 100644 --- a/src/gallium/drivers/cell/ppu/cell_context.h +++ b/src/gallium/drivers/cell/ppu/cell_context.h @@ -154,7 +154,7 @@ struct cell_context struct vertex_info vertex_info; /** Mapped constant buffers */ - void *mapped_constants[PIPE_SHADER_TYPES]; + const void *mapped_constants[PIPE_SHADER_TYPES]; PIPE_ALIGN_VAR(16) struct cell_spu_function_info spu_functions; -- cgit v1.2.3 From 2a8021667de68c6ee2723b52a27dd980c1ca5602 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Mon, 26 Jul 2010 20:49:09 -0600 Subject: cell: comment-out unneeded padding field --- src/gallium/drivers/cell/common.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium') diff --git a/src/gallium/drivers/cell/common.h b/src/gallium/drivers/cell/common.h index bbb112fd33..a8cdde34aa 100644 --- a/src/gallium/drivers/cell/common.h +++ b/src/gallium/drivers/cell/common.h @@ -230,7 +230,7 @@ struct cell_command_rasterizer { opcode_t opcode; /**< CELL_CMD_STATE_RASTERIZER */ struct pipe_rasterizer_state rasterizer; - uint32_t pad[1]; + /*uint32_t pad[1];*/ }; -- cgit v1.2.3 From 0315c00f5897eb443254323be2b7b5ca213fbef2 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Mon, 26 Jul 2010 20:49:35 -0600 Subject: cell: fix segfault when freeing samplers --- src/gallium/drivers/cell/ppu/cell_pipe_state.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium') diff --git a/src/gallium/drivers/cell/ppu/cell_pipe_state.c b/src/gallium/drivers/cell/ppu/cell_pipe_state.c index 03f84d295b..223adda48f 100644 --- a/src/gallium/drivers/cell/ppu/cell_pipe_state.c +++ b/src/gallium/drivers/cell/ppu/cell_pipe_state.c @@ -281,7 +281,7 @@ cell_set_fragment_sampler_views(struct pipe_context *pipe, struct pipe_resource *new_tex = new_view ? new_view->texture : NULL; pipe_sampler_view_reference(&cell->fragment_sampler_views[i], - views[i]); + new_view); pipe_resource_reference((struct pipe_resource **) &cell->texture[i], (struct pipe_resource *) new_tex); -- cgit v1.2.3 From dd0cf2e1f5d45337f7e5870ed957ac70f364ba52 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Mon, 26 Jul 2010 20:49:54 -0600 Subject: cell: make functions static --- src/gallium/drivers/cell/ppu/cell_state_vertex.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/drivers/cell/ppu/cell_state_vertex.c b/src/gallium/drivers/cell/ppu/cell_state_vertex.c index 9510ea9ac2..69152b6cbf 100644 --- a/src/gallium/drivers/cell/ppu/cell_state_vertex.c +++ b/src/gallium/drivers/cell/ppu/cell_state_vertex.c @@ -36,7 +36,7 @@ #include "draw/draw_context.h" -void * +static void * cell_create_vertex_elements_state(struct pipe_context *pipe, unsigned count, const struct pipe_vertex_element *attribs) @@ -51,7 +51,7 @@ cell_create_vertex_elements_state(struct pipe_context *pipe, return velems; } -void +static void cell_bind_vertex_elements_state(struct pipe_context *pipe, void *velems) { @@ -66,7 +66,7 @@ cell_bind_vertex_elements_state(struct pipe_context *pipe, draw_set_vertex_elements(cell->draw, cell_velems->count, cell_velems->velem); } -void +static void cell_delete_vertex_elements_state(struct pipe_context *pipe, void *velems) { FREE( velems ); -- cgit v1.2.3 From fef5ae949fd71419bb2a25f0944f0b50e60a668a Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Mon, 26 Jul 2010 20:50:17 -0600 Subject: cell: comment-out unused fields, functions --- src/gallium/drivers/cell/ppu/cell_fence.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/drivers/cell/ppu/cell_fence.c b/src/gallium/drivers/cell/ppu/cell_fence.c index 34ca864155..e7c9fc46d9 100644 --- a/src/gallium/drivers/cell/ppu/cell_fence.c +++ b/src/gallium/drivers/cell/ppu/cell_fence.c @@ -87,6 +87,7 @@ struct cell_buffer_node }; +#if 0 static void cell_add_buffer_to_list(struct cell_context *cell, struct cell_buffer_list *list, @@ -100,6 +101,7 @@ cell_add_buffer_to_list(struct cell_context *cell, list->head = node; } } +#endif /** @@ -113,7 +115,7 @@ cell_free_fenced_buffers(struct cell_context *cell, struct cell_buffer_list *list) { if (list->head) { - struct pipe_screen *ps = cell->pipe.screen; + /*struct pipe_screen *ps = cell->pipe.screen;*/ struct cell_buffer_node *node; cell_fence_finish(cell, &list->fence); @@ -146,7 +148,7 @@ cell_free_fenced_buffers(struct cell_context *cell, void cell_add_fenced_textures(struct cell_context *cell) { - struct cell_buffer_list *list = &cell->fenced_buffers[cell->cur_batch]; + /*struct cell_buffer_list *list = &cell->fenced_buffers[cell->cur_batch];*/ uint i; for (i = 0; i < cell->num_textures; i++) { -- cgit v1.2.3 From 2a45972fb2ba12a6561e5cba84d167f4c30566d4 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Tue, 27 Jul 2010 13:08:01 +0100 Subject: gallivm: Add lp_build_select_bitwise() alternative to lp_build_select_bitwise When (mask & a) | (~mask & b) is meant instead of mask ? a : b --- src/gallium/auxiliary/gallivm/lp_bld_logic.c | 67 +++++++++++++++++++--------- src/gallium/auxiliary/gallivm/lp_bld_logic.h | 5 +++ 2 files changed, 50 insertions(+), 22 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/auxiliary/gallivm/lp_bld_logic.c b/src/gallium/auxiliary/gallivm/lp_bld_logic.c index 39854e43b1..ab4ddb81c4 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_logic.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_logic.c @@ -362,10 +362,53 @@ lp_build_cmp(struct lp_build_context *bld, } +/** + * Return (mask & a) | (~mask & b); + */ +LLVMValueRef +lp_build_select_bitwise(struct lp_build_context *bld, + LLVMValueRef mask, + LLVMValueRef a, + LLVMValueRef b) +{ + struct lp_type type = bld->type; + LLVMValueRef res; + + if (a == b) { + return a; + } + + if(type.floating) { + LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); + a = LLVMBuildBitCast(bld->builder, a, int_vec_type, ""); + b = LLVMBuildBitCast(bld->builder, b, int_vec_type, ""); + } + + a = LLVMBuildAnd(bld->builder, a, mask, ""); + + /* This often gets translated to PANDN, but sometimes the NOT is + * pre-computed and stored in another constant. The best strategy depends + * on available registers, so it is not a big deal -- hopefully LLVM does + * the right decision attending the rest of the program. + */ + b = LLVMBuildAnd(bld->builder, b, LLVMBuildNot(bld->builder, mask, ""), ""); + + res = LLVMBuildOr(bld->builder, a, b, ""); + + if(type.floating) { + LLVMTypeRef vec_type = lp_build_vec_type(type); + res = LLVMBuildBitCast(bld->builder, res, vec_type, ""); + } + + return res; +} + + /** * Return mask ? a : b; * - * mask is a bitwise mask, composed of 0 or ~0 for each element. + * mask is a bitwise mask, composed of 0 or ~0 for each element. Any other value + * will yield unpredictable results. */ LLVMValueRef lp_build_select(struct lp_build_context *bld, @@ -424,27 +467,7 @@ lp_build_select(struct lp_build_context *bld, } } else { - if(type.floating) { - LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); - a = LLVMBuildBitCast(bld->builder, a, int_vec_type, ""); - b = LLVMBuildBitCast(bld->builder, b, int_vec_type, ""); - } - - a = LLVMBuildAnd(bld->builder, a, mask, ""); - - /* This often gets translated to PANDN, but sometimes the NOT is - * pre-computed and stored in another constant. The best strategy depends - * on available registers, so it is not a big deal -- hopefully LLVM does - * the right decision attending the rest of the program. - */ - b = LLVMBuildAnd(bld->builder, b, LLVMBuildNot(bld->builder, mask, ""), ""); - - res = LLVMBuildOr(bld->builder, a, b, ""); - - if(type.floating) { - LLVMTypeRef vec_type = lp_build_vec_type(type); - res = LLVMBuildBitCast(bld->builder, res, vec_type, ""); - } + res = lp_build_select_bitwise(bld, mask, a, b); } return res; diff --git a/src/gallium/auxiliary/gallivm/lp_bld_logic.h b/src/gallium/auxiliary/gallivm/lp_bld_logic.h index 29f9fc3b20..4e7b4c9938 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_logic.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_logic.h @@ -63,6 +63,11 @@ lp_build_cmp(struct lp_build_context *bld, LLVMValueRef a, LLVMValueRef b); +LLVMValueRef +lp_build_select_bitwise(struct lp_build_context *bld, + LLVMValueRef mask, + LLVMValueRef a, + LLVMValueRef b); LLVMValueRef lp_build_select(struct lp_build_context *bld, -- cgit v1.2.3 From 4fbffb7d909f9746fb744e133563c80c66574adb Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Tue, 27 Jul 2010 13:09:12 +0100 Subject: llvmpipe: Use lp_build_select_bitwise() where appropriate. Fixes fdo 29269. --- src/gallium/drivers/llvmpipe/lp_bld_depth.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/drivers/llvmpipe/lp_bld_depth.c b/src/gallium/drivers/llvmpipe/lp_bld_depth.c index e05bbe5011..99a768afd8 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_depth.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_depth.c @@ -258,16 +258,16 @@ lp_build_stencil_op_single(struct lp_build_context *bld, } if (stencil->writemask != stencilMax) { - /* compute res = (res & mask) | (stencilVals & ~mask) */ - LLVMValueRef mask = lp_build_const_int_vec(type, stencil->writemask); - LLVMValueRef cmask = LLVMBuildNot(bld->builder, mask, "notWritemask"); - LLVMValueRef t1 = LLVMBuildAnd(bld->builder, res, mask, "t1"); - LLVMValueRef t2 = LLVMBuildAnd(bld->builder, stencilVals, cmask, "t2"); - res = LLVMBuildOr(bld->builder, t1, t2, "t1_or_t2"); + /* mask &= stencil->writemask */ + LLVMValueRef writemask = lp_build_const_int_vec(type, stencil->writemask); + mask = LLVMBuildAnd(bld->builder, mask, writemask, ""); + /* res = (res & mask) | (stencilVals & ~mask) */ + res = lp_build_select_bitwise(bld, writemask, res, stencilVals); + } + else { + /* res = mask ? res : stencilVals */ + res = lp_build_select(bld, mask, res, stencilVals); } - - /* only the update the vector elements enabled by 'mask' */ - res = lp_build_select(bld, mask, res, stencilVals); return res; } @@ -662,9 +662,9 @@ lp_build_depth_stencil_test(LLVMBuilderRef builder, } /* Mix the old and new Z buffer values. - * z_dst[i] = zselectmask[i] ? z_src[i] : z_dst[i] + * z_dst[i] = (zselectmask[i] & z_src[i]) | (~zselectmask[i] & z_dst[i]) */ - z_dst = lp_build_select(&bld, zselectmask, z_src, z_dst); + z_dst = lp_build_select_bitwise(&bld, zselectmask, z_src, z_dst); } if (stencil[0].enabled) { -- cgit v1.2.3 From 2b3b76a4a0e21eb4bd4f1a4da5ff6ed26ccbabd1 Mon Sep 17 00:00:00 2001 From: Jerome Glisse Date: Tue, 27 Jul 2010 13:48:49 -0400 Subject: r600g: always emit literal after emiting an alu instruction Make sure we always fill in the literal after alu instruction. Signed-off-by: Jerome Glisse --- src/gallium/drivers/r600/r600_shader.c | 61 ++++++++++++++++++---------------- 1 file changed, 32 insertions(+), 29 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index e5e6786fd0..dcedf56c11 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -34,6 +34,30 @@ #include #include + +struct r600_shader_tgsi_instruction; + +struct r600_shader_ctx { + struct tgsi_shader_info info; + struct tgsi_parse_context parse; + const struct tgsi_token *tokens; + unsigned type; + unsigned file_offset[TGSI_FILE_COUNT]; + unsigned temp_reg; + struct r600_shader_tgsi_instruction *inst_info; + struct r600_bc *bc; + struct r600_shader *shader; + u32 value[4]; +}; + +struct r600_shader_tgsi_instruction { + unsigned tgsi_opcode; + unsigned is_op3; + unsigned r600_opcode; + int (*process)(struct r600_shader_ctx *ctx); +}; + +static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[]; static int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader); static int r600_shader_update(struct pipe_context *ctx, struct r600_shader *shader) @@ -216,29 +240,6 @@ int r600_pipe_shader_update(struct pipe_context *ctx, struct r600_pipe_shader *r return r600_pipe_shader(ctx, rpshader); } -struct r600_shader_tgsi_instruction; - -struct r600_shader_ctx { - struct tgsi_shader_info info; - struct tgsi_parse_context parse; - const struct tgsi_token *tokens; - unsigned type; - unsigned file_offset[TGSI_FILE_COUNT]; - unsigned temp_reg; - struct r600_shader_tgsi_instruction *inst_info; - struct r600_bc *bc; - struct r600_shader *shader; -}; - -struct r600_shader_tgsi_instruction { - unsigned tgsi_opcode; - unsigned is_op3; - unsigned r600_opcode; - int (*process)(struct r600_shader_ctx *ctx); -}; - -static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[]; - static int tgsi_is_supported(struct r600_shader_ctx *ctx) { struct tgsi_full_instruction *i = &ctx->parse.FullToken.FullInstruction; @@ -334,7 +335,6 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s struct r600_bc_output output; unsigned opcode; int i, r = 0, pos0; - u32 value[4]; ctx.bc = &shader->bc; ctx.shader = shader; @@ -380,10 +380,10 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s switch (ctx.parse.FullToken.Token.Type) { case TGSI_TOKEN_TYPE_IMMEDIATE: immediate = &ctx.parse.FullToken.FullImmediate; - value[0] = immediate->u[0].Uint; - value[1] = immediate->u[1].Uint; - value[2] = immediate->u[2].Uint; - value[3] = immediate->u[3].Uint; + ctx.value[0] = immediate->u[0].Uint; + ctx.value[1] = immediate->u[1].Uint; + ctx.value[2] = immediate->u[2].Uint; + ctx.value[3] = immediate->u[3].Uint; break; case TGSI_TOKEN_TYPE_DECLARATION: r = tgsi_declaration(&ctx); @@ -399,7 +399,7 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s r = ctx.inst_info->process(&ctx); if (r) goto out_err; - r = r600_bc_add_literal(ctx.bc, value); + r = r600_bc_add_literal(ctx.bc, ctx.value); if (r) goto out_err; break; @@ -557,6 +557,9 @@ static int tgsi_slt(struct r600_shader_ctx *ctx) struct r600_bc_alu alu; int i, r; + r = r600_bc_add_literal(ctx->bc, ctx->value); + if (r) + return r; for (i = 0; i < 4; i++) { memset(&alu, 0, sizeof(struct r600_bc_alu)); if (!(inst->Dst[0].Register.WriteMask & (1 << i))) { -- cgit v1.2.3 From cc09dc2773a729ab361c58ea5fc70ff070d0b1d2 Mon Sep 17 00:00:00 2001 From: Jakob Bornecrantz Date: Mon, 26 Jul 2010 21:16:05 -0700 Subject: swrastg: Add SWRAST_NO_PRESENT option to not send updates to X server There seem to be a problem with this path and freeglut where the window wont open if SWRAST_NO_PRESENT is set to true. --- src/gallium/state_trackers/dri/sw/drisw.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'src/gallium') diff --git a/src/gallium/state_trackers/dri/sw/drisw.c b/src/gallium/state_trackers/dri/sw/drisw.c index 23e99aa0ad..86297c3f80 100644 --- a/src/gallium/state_trackers/dri/sw/drisw.c +++ b/src/gallium/state_trackers/dri/sw/drisw.c @@ -46,6 +46,8 @@ #include "dri1_helper.h" #include "drisw.h" +DEBUG_GET_ONCE_BOOL_OPTION(swrast_no_present, "SWRAST_NO_PRESENT", FALSE); +static boolean swrast_no_present = FALSE; static INLINE void get_drawable_info(__DRIdrawable *dPriv, int *w, int *h) @@ -95,6 +97,9 @@ drisw_present_texture(__DRIdrawable *dPriv, struct dri_screen *screen = dri_screen(drawable->sPriv); struct pipe_surface *psurf; + if (swrast_no_present) + return; + psurf = dri1_get_pipe_surface(drawable, ptex); if (!psurf) return; @@ -215,7 +220,8 @@ drisw_allocate_textures(struct dri_drawable *drawable, dri_drawable_get_format(drawable, statts[i], &format, &bind); - if (statts[i] != ST_ATTACHMENT_DEPTH_STENCIL) + /* if we don't do any present, no need for display targets */ + if (statts[i] != ST_ATTACHMENT_DEPTH_STENCIL && !swrast_no_present) bind |= PIPE_BIND_DISPLAY_TARGET; if (format == PIPE_FORMAT_NONE) @@ -261,6 +267,8 @@ drisw_init_screen(__DRIscreen * sPriv) screen->update_drawable_info = drisw_update_drawable_info; screen->flush_frontbuffer = drisw_flush_frontbuffer; + swrast_no_present = debug_get_option_swrast_no_present(); + sPriv->private = (void *)screen; sPriv->extensions = drisw_screen_extensions; -- cgit v1.2.3 From 6a20539580e1b7ae921d497fbf66a8fd545efea4 Mon Sep 17 00:00:00 2001 From: Vinson Lee Date: Tue, 27 Jul 2010 15:33:45 -0700 Subject: r600g: Move declaration before code. Fixes SCons build. --- src/gallium/drivers/r600/r600_shader.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index dcedf56c11..17e9c14d6b 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -651,6 +651,9 @@ static int tgsi_lit(struct r600_shader_ctx *ctx) if (inst->Dst[0].Register.WriteMask & (1 << 2)) { + int chan; + int sel; + /* dst.z = log(src.y) */ memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED; @@ -665,8 +668,8 @@ static int tgsi_lit(struct r600_shader_ctx *ctx) if (r) return r; - int chan = alu.dst.chan; - int sel = alu.dst.sel; + chan = alu.dst.chan; + sel = alu.dst.sel; /* tmp.x = amd MUL_LIT(src.w, dst.z, src.x ) */ memset(&alu, 0, sizeof(struct r600_bc_alu)); -- cgit v1.2.3 From 25472942c9640f6c0d252de2f013d04ac2355b1d Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Tue, 27 Jul 2010 17:40:00 -0600 Subject: llvmpipe: pass face+slice to llvmpipe_unswizzle_cbuf_tile() Cube map faces and 3D texture slices are treated the same in llvmpipe textures. Need to pass the sum of these fields to llvmpipe_unswizzle_cbuf_tile() as we do elsewhere. Fixes piglit fbo-3d test (fd.o bug 29135). --- src/gallium/drivers/llvmpipe/lp_rast.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index 654f4ea48e..ba7b48328b 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -368,14 +368,15 @@ lp_rast_store_linear_color( struct lp_rasterizer_task *task, for (buf = 0; buf < rast->state.nr_cbufs; buf++) { struct pipe_surface *cbuf = scene->fb.cbufs[buf]; - const unsigned face = cbuf->face, level = cbuf->level; + const unsigned face_slice = cbuf->face + cbuf->zslice; + const unsigned level = cbuf->level; struct llvmpipe_resource *lpt = llvmpipe_resource(cbuf->texture); if (!task->color_tiles[buf]) continue; llvmpipe_unswizzle_cbuf_tile(lpt, - face, + face_slice, level, task->x, task->y, task->color_tiles[buf]); -- cgit v1.2.3 From 641c9adb09e8707f659d42be600d16902ebf8895 Mon Sep 17 00:00:00 2001 From: Jerome Glisse Date: Tue, 27 Jul 2010 20:15:17 -0400 Subject: r600g: texture support Add texture mapping support, redbook/texbind works if you comment out glClear and second checkboard. Need to fix : - texture overwritting - lod & mip/map handling - unormalized coordinate handling - texture view with first leve > 0 - and many other things Signed-off-by: Jerome Glisse --- src/gallium/drivers/r600/r600_context.c | 2 +- src/gallium/drivers/r600/r600_context.h | 12 ++ src/gallium/drivers/r600/r600_draw.c | 11 ++ src/gallium/drivers/r600/r600_shader.c | 78 ++++++++- src/gallium/drivers/r600/r600_state.c | 277 +++++++++++++++++++++++++++++++- src/gallium/drivers/r600/r600_texture.c | 4 + src/gallium/drivers/r600/r600d.h | 235 +++++++++++++++++++++++++++ 7 files changed, 610 insertions(+), 9 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/drivers/r600/r600_context.c b/src/gallium/drivers/r600/r600_context.c index 05575b5767..3c5195f79e 100644 --- a/src/gallium/drivers/r600/r600_context.c +++ b/src/gallium/drivers/r600/r600_context.c @@ -55,7 +55,7 @@ static void r600_flush(struct pipe_context *ctx, unsigned flags, */ if (!dc) radeon_ctx_dump_bof(rctx->ctx, "gallium.bof"); -#if 0 +#if 1 radeon_ctx_submit(rctx->ctx); #endif rctx->ctx = radeon_ctx_decref(rctx->ctx); diff --git a/src/gallium/drivers/r600/r600_context.h b/src/gallium/drivers/r600/r600_context.h index 669aaec0b2..30f33f757e 100644 --- a/src/gallium/drivers/r600/r600_context.h +++ b/src/gallium/drivers/r600/r600_context.h @@ -23,6 +23,7 @@ #ifndef R600_CONTEXT_H #define R600_CONTEXT_H +#include #include #include #include @@ -45,6 +46,11 @@ struct r600_pipe_shader { struct radeon_state *state; }; +struct r600_texture_resource { + struct pipe_sampler_view view; + struct radeon_state *state; +}; + struct r600_context { struct pipe_context context; struct r600_screen *screen; @@ -55,6 +61,12 @@ struct r600_context { struct radeon_state *config; struct r600_pipe_shader *ps_shader; struct r600_pipe_shader *vs_shader; + unsigned nps_sampler; + struct radeon_state *ps_sampler[PIPE_MAX_ATTRIBS]; + unsigned nps_view; + unsigned nvs_view; + struct r600_texture_resource *ps_view[PIPE_MAX_ATTRIBS]; + struct r600_texture_resource *vs_view[PIPE_MAX_ATTRIBS]; unsigned flat_shade; unsigned nvertex_buffer; struct r600_vertex_elements_state *vertex_elements; diff --git a/src/gallium/drivers/r600/r600_draw.c b/src/gallium/drivers/r600/r600_draw.c index 724fb6c988..e0d624889f 100644 --- a/src/gallium/drivers/r600/r600_draw.c +++ b/src/gallium/drivers/r600/r600_draw.c @@ -125,6 +125,17 @@ static int r600_draw_common(struct r600_draw *draw) if (r) return r; } + /* setup texture sampler & resource */ + for (i = 0 ; i < rctx->nps_sampler; i++) { + r = radeon_draw_set_new(rctx->draw, rctx->ps_sampler[i]); + if (r) + return r; + } + for (i = 0 ; i < rctx->nps_view; i++) { + r = radeon_draw_set_new(rctx->draw, rctx->ps_view[i]->state); + if (r) + return r; + } /* FIXME start need to change winsys */ draw->draw = radeon_state(rscreen->rw, R600_DRAW_TYPE, R600_DRAW); if (draw->draw == NULL) diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index 17e9c14d6b..9796112775 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -838,12 +838,82 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; struct r600_bc_tex tex; + struct r600_bc_alu alu; + unsigned src_gpr; + int r; + + src_gpr = ctx->file_offset[inst->Src[0].Register.File] + inst->Src[0].Register.Index; + + /* Add perspective divide */ + if (ctx->inst_info->tgsi_opcode == TGSI_OPCODE_TXP) { + memset(&alu, 0, sizeof(struct r600_bc_alu)); + alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE; + alu.src[0].sel = src_gpr; + alu.src[0].chan = 3; + alu.dst.sel = ctx->temp_reg; + alu.dst.chan = 3; + alu.last = 1; + alu.dst.write = 1; + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + + memset(&alu, 0, sizeof(struct r600_bc_alu)); + alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL; + alu.src[0].sel = ctx->temp_reg; + alu.src[0].chan = 3; + alu.src[1].sel = src_gpr; + alu.src[1].chan = 0; + alu.dst.sel = ctx->temp_reg; + alu.dst.chan = 0; + alu.dst.write = 1; + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + memset(&alu, 0, sizeof(struct r600_bc_alu)); + alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL; + alu.src[0].sel = ctx->temp_reg; + alu.src[0].chan = 3; + alu.src[1].sel = src_gpr; + alu.src[1].chan = 1; + alu.dst.sel = ctx->temp_reg; + alu.dst.chan = 1; + alu.dst.write = 1; + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + memset(&alu, 0, sizeof(struct r600_bc_alu)); + alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL; + alu.src[0].sel = ctx->temp_reg; + alu.src[0].chan = 3; + alu.src[1].sel = src_gpr; + alu.src[1].chan = 2; + alu.dst.sel = ctx->temp_reg; + alu.dst.chan = 2; + alu.dst.write = 1; + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + memset(&alu, 0, sizeof(struct r600_bc_alu)); + alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV; + alu.src[0].sel = 249; + alu.src[0].chan = 0; + alu.dst.sel = ctx->temp_reg; + alu.dst.chan = 3; + alu.last = 1; + alu.dst.write = 1; + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + src_gpr = ctx->temp_reg; + } + /* TODO use temp if src_gpr is not a temporary reg (File != TEMPORARY) */ memset(&tex, 0, sizeof(struct r600_bc_tex)); tex.inst = ctx->inst_info->r600_opcode; tex.resource_id = ctx->file_offset[inst->Src[1].Register.File] + inst->Src[1].Register.Index; tex.sampler_id = tex.resource_id; - tex.src_gpr = ctx->file_offset[inst->Src[0].Register.File] + inst->Src[0].Register.Index; + tex.src_gpr = src_gpr; tex.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Src[0].Register.Index; tex.dst_sel_x = 0; tex.dst_sel_y = 1; @@ -853,6 +923,10 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) tex.src_sel_y = 1; tex.src_sel_z = 2; tex.src_sel_w = 3; + tex.coord_type_x = 1; + tex.coord_type_y = 1; + tex.coord_type_z = 1; + tex.coord_type_w = 1; return r600_bc_add_tex(ctx->bc, &tex); } @@ -912,7 +986,7 @@ static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = { {TGSI_OPCODE_SLE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_SNE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_STR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_TEX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_TEX, 0, 0x10, tgsi_tex}, {TGSI_OPCODE_TXD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_TXP, 0, 0x10, tgsi_tex}, {TGSI_OPCODE_UP2H, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index 84a13e4ef7..6503c3740e 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -187,6 +187,8 @@ static void *r600_create_rs_state(struct pipe_context *ctx, struct radeon_state *rstate; rctx->flat_shade = state->flatshade; + rctx->flat_shade = 0; +R600_ERR("flat shade with texture broke tex coord interp\n"); rstate = radeon_state(rscreen->rw, R600_RASTERIZER_TYPE, R600_RASTERIZER); if (rstate == NULL) return NULL; @@ -224,43 +226,306 @@ static void r600_bind_rs_state(struct pipe_context *ctx, void *state) radeon_draw_set(rctx->draw, state); } +static inline unsigned r600_tex_wrap(unsigned wrap) +{ + switch (wrap) { + default: + case PIPE_TEX_WRAP_REPEAT: + return V_03C000_SQ_TEX_WRAP; + case PIPE_TEX_WRAP_CLAMP: + return V_03C000_SQ_TEX_CLAMP_LAST_TEXEL; + case PIPE_TEX_WRAP_CLAMP_TO_EDGE: + return V_03C000_SQ_TEX_CLAMP_HALF_BORDER; + case PIPE_TEX_WRAP_CLAMP_TO_BORDER: + return V_03C000_SQ_TEX_CLAMP_BORDER; + case PIPE_TEX_WRAP_MIRROR_REPEAT: + return V_03C000_SQ_TEX_MIRROR; + case PIPE_TEX_WRAP_MIRROR_CLAMP: + return V_03C000_SQ_TEX_MIRROR_ONCE_LAST_TEXEL; + case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: + return V_03C000_SQ_TEX_MIRROR_ONCE_HALF_BORDER; + case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: + return V_03C000_SQ_TEX_MIRROR_ONCE_BORDER; + } +} + +static inline unsigned r600_tex_filter(unsigned filter) +{ + switch (filter) { + default: + case PIPE_TEX_FILTER_NEAREST: + return V_03C000_SQ_TEX_XY_FILTER_POINT; + case PIPE_TEX_FILTER_LINEAR: + return V_03C000_SQ_TEX_XY_FILTER_BILINEAR; + } +} + +static inline unsigned r600_tex_mipfilter(unsigned filter) +{ + switch (filter) { + case PIPE_TEX_MIPFILTER_NEAREST: + return V_03C000_SQ_TEX_Z_FILTER_POINT; + case PIPE_TEX_MIPFILTER_LINEAR: + return V_03C000_SQ_TEX_Z_FILTER_LINEAR; + default: + case PIPE_TEX_MIPFILTER_NONE: + return V_03C000_SQ_TEX_Z_FILTER_NONE; + } +} + +static inline unsigned r600_tex_compare(unsigned compare) +{ + switch (compare) { + default: + case PIPE_FUNC_NEVER: + return V_03C000_SQ_TEX_DEPTH_COMPARE_NEVER; + case PIPE_FUNC_LESS: + return V_03C000_SQ_TEX_DEPTH_COMPARE_LESS; + case PIPE_FUNC_EQUAL: + return V_03C000_SQ_TEX_DEPTH_COMPARE_EQUAL; + case PIPE_FUNC_LEQUAL: + return V_03C000_SQ_TEX_DEPTH_COMPARE_LESSEQUAL; + case PIPE_FUNC_GREATER: + return V_03C000_SQ_TEX_DEPTH_COMPARE_GREATER; + case PIPE_FUNC_NOTEQUAL: + return V_03C000_SQ_TEX_DEPTH_COMPARE_NOTEQUAL; + case PIPE_FUNC_GEQUAL: + return V_03C000_SQ_TEX_DEPTH_COMPARE_GREATEREQUAL; + case PIPE_FUNC_ALWAYS: + return V_03C000_SQ_TEX_DEPTH_COMPARE_ALWAYS; + } +} + static void *r600_create_sampler_state(struct pipe_context *ctx, const struct pipe_sampler_state *state) { - return NULL; + struct r600_screen *rscreen = r600_screen(ctx->screen); + struct radeon_state *rstate; + + rstate = radeon_state(rscreen->rw, R600_PS_SAMPLER_TYPE, R600_PS_SAMPLER); + if (rstate == NULL) + return NULL; + rstate->states[R600_PS_SAMPLER__SQ_TEX_SAMPLER_WORD0_0] = + S_03C000_CLAMP_X(r600_tex_wrap(state->wrap_s)) | + S_03C000_CLAMP_Y(r600_tex_wrap(state->wrap_t)) | + S_03C000_CLAMP_Z(r600_tex_wrap(state->wrap_r)) | + S_03C000_XY_MAG_FILTER(r600_tex_filter(state->mag_img_filter)) | + S_03C000_XY_MIN_FILTER(r600_tex_filter(state->min_img_filter)) | + S_03C000_MIP_FILTER(r600_tex_mipfilter(state->min_mip_filter)) | + S_03C000_DEPTH_COMPARE_FUNCTION(r600_tex_compare(state->compare_func)); + /* FIXME LOD it depends on texture base level ... */ + rstate->states[R600_PS_SAMPLER__SQ_TEX_SAMPLER_WORD1_0] = + S_03C004_MIN_LOD(0) | + S_03C004_MAX_LOD(0) | + S_03C004_LOD_BIAS(0); + rstate->states[R600_PS_SAMPLER__SQ_TEX_SAMPLER_WORD2_0] = S_03C008_TYPE(1); + if (radeon_state_pm4(rstate)) { + radeon_state_decref(rstate); + return NULL; + } + return rstate; } static void r600_bind_sampler_states(struct pipe_context *ctx, unsigned count, void **states) { + struct r600_context *rctx = r600_context(ctx); + unsigned i; + + /* FIXME split VS/PS/GS sampler */ + for (i = 0; i < count; i++) { + rctx->ps_sampler[i] = radeon_state_decref(rctx->ps_sampler[i]); + } + rctx->nps_sampler = count; + for (i = 0; i < count; i++) { + rctx->ps_sampler[i] = radeon_state_incref(states[i]); + rctx->ps_sampler[i]->id = R600_PS_SAMPLER + i; + } +} + +static inline unsigned r600_tex_swizzle(unsigned swizzle) +{ + switch (swizzle) { + case PIPE_SWIZZLE_RED: + return V_038010_SQ_SEL_X; + case PIPE_SWIZZLE_GREEN: + return V_038010_SQ_SEL_Y; + case PIPE_SWIZZLE_BLUE: + return V_038010_SQ_SEL_Z; + case PIPE_SWIZZLE_ALPHA: + return V_038010_SQ_SEL_W; + case PIPE_SWIZZLE_ZERO: + return V_038010_SQ_SEL_0; + default: + case PIPE_SWIZZLE_ONE: + return V_038010_SQ_SEL_1; + } +} + +static inline unsigned r600_format_type(unsigned format_type) +{ + switch (format_type) { + default: + case UTIL_FORMAT_TYPE_UNSIGNED: + return V_038010_SQ_FORMAT_COMP_UNSIGNED; + case UTIL_FORMAT_TYPE_SIGNED: + return V_038010_SQ_FORMAT_COMP_SIGNED; + case UTIL_FORMAT_TYPE_FIXED: + return V_038010_SQ_FORMAT_COMP_UNSIGNED_BIASED; + } +} + +static inline unsigned r600_tex_dim(unsigned dim) +{ + switch (dim) { + default: + case PIPE_TEXTURE_1D: + return V_038000_SQ_TEX_DIM_1D; + case PIPE_TEXTURE_2D: + return V_038000_SQ_TEX_DIM_2D; + case PIPE_TEXTURE_3D: + return V_038000_SQ_TEX_DIM_3D; + case PIPE_TEXTURE_CUBE: + return V_038000_SQ_TEX_DIM_CUBEMAP; + } } static struct pipe_sampler_view *r600_create_sampler_view(struct pipe_context *ctx, struct pipe_resource *texture, - const struct pipe_sampler_view *templ) + const struct pipe_sampler_view *view) { - struct pipe_sampler_view *view = CALLOC_STRUCT(pipe_sampler_view); + struct r600_screen *rscreen = r600_screen(ctx->screen); + struct r600_texture_resource *rtexture; + const struct util_format_description *desc; + struct r600_texture *tmp; + struct r600_buffer *rbuffer; + unsigned format; - *view = *templ; - return view; + if (r600_conv_pipe_format(texture->format, &format)) + return NULL; + rtexture = CALLOC_STRUCT(r600_texture_resource); + if (rtexture == NULL) + return NULL; + desc = util_format_description(texture->format); + assert(desc == NULL); + rtexture->state = radeon_state(rscreen->rw, R600_PS_RESOURCE_TYPE, R600_PS_RESOURCE); + if (rtexture->state == NULL) { + FREE(rtexture); + return NULL; + } + rtexture->view = *view; + rtexture->view.reference.count = 1; + rtexture->view.texture = NULL; + pipe_resource_reference(&rtexture->view.texture, texture); + rtexture->view.context = ctx; + + tmp = (struct r600_texture*)texture; + rbuffer = (struct r600_buffer*)tmp->buffer; + rtexture->state->bo[0] = radeon_bo_incref(rscreen->rw, rbuffer->bo); + rtexture->state->bo[1] = radeon_bo_incref(rscreen->rw, rbuffer->bo); + rtexture->state->nbo = 2; + rtexture->state->placement[0] = RADEON_GEM_DOMAIN_GTT; + rtexture->state->placement[1] = RADEON_GEM_DOMAIN_GTT; + rtexture->state->placement[2] = RADEON_GEM_DOMAIN_GTT; + rtexture->state->placement[3] = RADEON_GEM_DOMAIN_GTT; + + /* FIXME properly handle first level != 0 */ + rtexture->state->states[R600_PS_RESOURCE__RESOURCE0_WORD0] = + S_038000_DIM(r600_tex_dim(texture->target)) | + S_038000_PITCH((tmp->pitch[0] / 8) - 1) | + S_038000_TEX_WIDTH(texture->width0 - 1); + rtexture->state->states[R600_PS_RESOURCE__RESOURCE0_WORD1] = + S_038004_TEX_HEIGHT(texture->height0 - 1) | + S_038004_TEX_DEPTH(texture->depth0 - 1) | + S_038004_DATA_FORMAT(format); + rtexture->state->states[R600_PS_RESOURCE__RESOURCE0_WORD2] = 0; + rtexture->state->states[R600_PS_RESOURCE__RESOURCE0_WORD3] = tmp->offset[1] >> 8; + rtexture->state->states[R600_PS_RESOURCE__RESOURCE0_WORD4] = + S_038010_FORMAT_COMP_X(r600_format_type(UTIL_FORMAT_TYPE_UNSIGNED)) | + S_038010_FORMAT_COMP_Y(r600_format_type(UTIL_FORMAT_TYPE_UNSIGNED)) | + S_038010_FORMAT_COMP_Z(r600_format_type(UTIL_FORMAT_TYPE_UNSIGNED)) | + S_038010_FORMAT_COMP_W(r600_format_type(UTIL_FORMAT_TYPE_UNSIGNED)) | + S_038010_NUM_FORMAT_ALL(V_038010_SQ_NUM_FORMAT_NORM) | + S_038010_SRF_MODE_ALL(V_038010_SFR_MODE_NO_ZERO) | + S_038010_REQUEST_SIZE(1) | + S_038010_DST_SEL_X(r600_tex_swizzle(view->swizzle_r)) | + S_038010_DST_SEL_Y(r600_tex_swizzle(view->swizzle_g)) | + S_038010_DST_SEL_Z(r600_tex_swizzle(view->swizzle_b)) | + S_038010_DST_SEL_W(r600_tex_swizzle(view->swizzle_a)) | + S_038010_BASE_LEVEL(view->first_level); + rtexture->state->states[R600_PS_RESOURCE__RESOURCE0_WORD5] = + S_038014_LAST_LEVEL(view->last_level) | + S_038014_BASE_ARRAY(0) | + S_038014_LAST_ARRAY(0); + rtexture->state->states[R600_PS_RESOURCE__RESOURCE0_WORD6] = + S_038018_TYPE(V_038010_SQ_TEX_VTX_VALID_TEXTURE); + return &rtexture->view; } static void r600_sampler_view_destroy(struct pipe_context *ctx, struct pipe_sampler_view *view) { - FREE(view); + struct r600_texture_resource *texture; + + if (view == NULL) + return; + texture = LIST_ENTRY(struct r600_texture_resource, view, view); + radeon_state_decref(texture->state); + FREE(texture); } static void r600_set_fragment_sampler_views(struct pipe_context *ctx, unsigned count, struct pipe_sampler_view **views) { + struct r600_texture_resource *rtexture; + struct r600_context *rctx = r600_context(ctx); + struct pipe_sampler_view *tmp; + unsigned i; + + if (views == NULL) + return; + + for (i = 0; i < rctx->nps_view; i++) { + tmp = &rctx->ps_view[i]->view; + pipe_sampler_view_reference(&tmp, NULL); + rctx->ps_view[i] = NULL; + } + rctx->nps_view = count; + for (i = 0; i < count; i++) { + rtexture = LIST_ENTRY(struct r600_texture_resource, views[i], view); + rctx->ps_view[i] = rtexture; + tmp = NULL; + pipe_sampler_view_reference(&tmp, views[i]); + rtexture->state->id = R600_PS_RESOURCE + i; + } } static void r600_set_vertex_sampler_views(struct pipe_context *ctx, unsigned count, struct pipe_sampler_view **views) { + struct r600_texture_resource *rtexture; + struct r600_context *rctx = r600_context(ctx); + struct pipe_sampler_view *tmp; + unsigned i; + + if (views == NULL) + return; + + for (i = 0; i < rctx->nvs_view; i++) { + tmp = &rctx->vs_view[i]->view; + pipe_sampler_view_reference(&tmp, NULL); + rctx->vs_view[i] = NULL; + } + rctx->nps_view = count; + for (i = 0; i < count; i++) { + rtexture = LIST_ENTRY(struct r600_texture_resource, views[i], view); + rctx->vs_view[i] = rtexture; + tmp = NULL; + pipe_sampler_view_reference(&tmp, views[i]); + rtexture->state->id = R600_VS_RESOURCE + i; + } } static void r600_set_scissor_state(struct pipe_context *ctx, diff --git a/src/gallium/drivers/r600/r600_texture.c b/src/gallium/drivers/r600/r600_texture.c index 903cfad80a..1c219a5579 100644 --- a/src/gallium/drivers/r600/r600_texture.c +++ b/src/gallium/drivers/r600/r600_texture.c @@ -31,7 +31,9 @@ #include #include "state_tracker/drm_driver.h" #include "r600_screen.h" +#include "r600_context.h" #include "r600_texture.h" +#include "r600d.h" extern struct u_resource_vtbl r600_texture_vtbl; @@ -69,6 +71,8 @@ static void r600_setup_miptree(struct r600_screen *rscreen, struct r600_texture rtex->offset[i] = offset; rtex->layer_size[i] = layer_size; rtex->pitch[i] = stride / util_format_get_blocksize(ptex->format); + rtex->pitch[i] += R600_TEXEL_PITCH_ALIGNMENT_MASK; + rtex->pitch[i] &= ~R600_TEXEL_PITCH_ALIGNMENT_MASK; rtex->stride[i] = stride; offset += align(size, 32); } diff --git a/src/gallium/drivers/r600/r600d.h b/src/gallium/drivers/r600/r600d.h index 44834984c6..593b95c9c7 100644 --- a/src/gallium/drivers/r600/r600d.h +++ b/src/gallium/drivers/r600/r600d.h @@ -26,6 +26,8 @@ #ifndef R600D_H #define R600D_H +#define R600_TEXEL_PITCH_ALIGNMENT_MASK 0x7 + #define PKT3_NOP 0x10 #define PKT3_INDIRECT_BUFFER_END 0x17 #define PKT3_SET_PREDICATION 0x20 @@ -574,6 +576,132 @@ #define S_0287F0_USE_OPAQUE(x) (((x) & 0x1) << 6) #define G_0287F0_USE_OPAQUE(x) (((x) >> 6) & 0x1) #define C_0287F0_USE_OPAQUE 0xFFFFFFBF +#define R_038000_SQ_TEX_RESOURCE_WORD0_0 0x038000 +#define S_038000_DIM(x) (((x) & 0x7) << 0) +#define G_038000_DIM(x) (((x) >> 0) & 0x7) +#define C_038000_DIM 0xFFFFFFF8 +#define V_038000_SQ_TEX_DIM_1D 0x00000000 +#define V_038000_SQ_TEX_DIM_2D 0x00000001 +#define V_038000_SQ_TEX_DIM_3D 0x00000002 +#define V_038000_SQ_TEX_DIM_CUBEMAP 0x00000003 +#define V_038000_SQ_TEX_DIM_1D_ARRAY 0x00000004 +#define V_038000_SQ_TEX_DIM_2D_ARRAY 0x00000005 +#define V_038000_SQ_TEX_DIM_2D_MSAA 0x00000006 +#define V_038000_SQ_TEX_DIM_2D_ARRAY_MSAA 0x00000007 +#define S_038000_TILE_MODE(x) (((x) & 0xF) << 3) +#define G_038000_TILE_MODE(x) (((x) >> 3) & 0xF) +#define C_038000_TILE_MODE 0xFFFFFF87 +#define S_038000_TILE_TYPE(x) (((x) & 0x1) << 7) +#define G_038000_TILE_TYPE(x) (((x) >> 7) & 0x1) +#define C_038000_TILE_TYPE 0xFFFFFF7F +#define S_038000_PITCH(x) (((x) & 0x7FF) << 8) +#define G_038000_PITCH(x) (((x) >> 8) & 0x7FF) +#define C_038000_PITCH 0xFFF800FF +#define S_038000_TEX_WIDTH(x) (((x) & 0x1FFF) << 19) +#define G_038000_TEX_WIDTH(x) (((x) >> 19) & 0x1FFF) +#define C_038000_TEX_WIDTH 0x0007FFFF +#define R_038004_SQ_TEX_RESOURCE_WORD1_0 0x038004 +#define S_038004_TEX_HEIGHT(x) (((x) & 0x1FFF) << 0) +#define G_038004_TEX_HEIGHT(x) (((x) >> 0) & 0x1FFF) +#define C_038004_TEX_HEIGHT 0xFFFFE000 +#define S_038004_TEX_DEPTH(x) (((x) & 0x1FFF) << 13) +#define G_038004_TEX_DEPTH(x) (((x) >> 13) & 0x1FFF) +#define C_038004_TEX_DEPTH 0xFC001FFF +#define S_038004_DATA_FORMAT(x) (((x) & 0x3F) << 26) +#define G_038004_DATA_FORMAT(x) (((x) >> 26) & 0x3F) +#define C_038004_DATA_FORMAT 0x03FFFFFF +#define R_038008_SQ_TEX_RESOURCE_WORD2_0 0x038008 +#define S_038008_BASE_ADDRESS(x) (((x) & 0xFFFFFFFF) << 0) +#define G_038008_BASE_ADDRESS(x) (((x) >> 0) & 0xFFFFFFFF) +#define C_038008_BASE_ADDRESS 0x00000000 +#define R_03800C_SQ_TEX_RESOURCE_WORD3_0 0x03800C +#define S_03800C_MIP_ADDRESS(x) (((x) & 0xFFFFFFFF) << 0) +#define G_03800C_MIP_ADDRESS(x) (((x) >> 0) & 0xFFFFFFFF) +#define C_03800C_MIP_ADDRESS 0x00000000 +#define R_038010_SQ_TEX_RESOURCE_WORD4_0 0x038010 +#define S_038010_FORMAT_COMP_X(x) (((x) & 0x3) << 0) +#define G_038010_FORMAT_COMP_X(x) (((x) >> 0) & 0x3) +#define C_038010_FORMAT_COMP_X 0xFFFFFFFC +#define V_038010_SQ_FORMAT_COMP_UNSIGNED 0x00000000 +#define V_038010_SQ_FORMAT_COMP_SIGNED 0x00000001 +#define V_038010_SQ_FORMAT_COMP_UNSIGNED_BIASED 0x00000002 +#define S_038010_FORMAT_COMP_Y(x) (((x) & 0x3) << 2) +#define G_038010_FORMAT_COMP_Y(x) (((x) >> 2) & 0x3) +#define C_038010_FORMAT_COMP_Y 0xFFFFFFF3 +#define S_038010_FORMAT_COMP_Z(x) (((x) & 0x3) << 4) +#define G_038010_FORMAT_COMP_Z(x) (((x) >> 4) & 0x3) +#define C_038010_FORMAT_COMP_Z 0xFFFFFFCF +#define S_038010_FORMAT_COMP_W(x) (((x) & 0x3) << 6) +#define G_038010_FORMAT_COMP_W(x) (((x) >> 6) & 0x3) +#define C_038010_FORMAT_COMP_W 0xFFFFFF3F +#define S_038010_NUM_FORMAT_ALL(x) (((x) & 0x3) << 8) +#define G_038010_NUM_FORMAT_ALL(x) (((x) >> 8) & 0x3) +#define C_038010_NUM_FORMAT_ALL 0xFFFFFCFF +#define V_038010_SQ_NUM_FORMAT_NORM 0x00000000 +#define V_038010_SQ_NUM_FORMAT_INT 0x00000001 +#define V_038010_SQ_NUM_FORMAT_SCALED 0x00000002 +#define S_038010_SRF_MODE_ALL(x) (((x) & 0x1) << 10) +#define G_038010_SRF_MODE_ALL(x) (((x) >> 10) & 0x1) +#define C_038010_SRF_MODE_ALL 0xFFFFFBFF +#define V_038010_SFR_MODE_ZERO_CLAMP_MINUS_ONE 0x00000000 +#define V_038010_SFR_MODE_NO_ZERO 0x00000001 +#define S_038010_FORCE_DEGAMMA(x) (((x) & 0x1) << 11) +#define G_038010_FORCE_DEGAMMA(x) (((x) >> 11) & 0x1) +#define C_038010_FORCE_DEGAMMA 0xFFFFF7FF +#define S_038010_ENDIAN_SWAP(x) (((x) & 0x3) << 12) +#define G_038010_ENDIAN_SWAP(x) (((x) >> 12) & 0x3) +#define C_038010_ENDIAN_SWAP 0xFFFFCFFF +#define S_038010_REQUEST_SIZE(x) (((x) & 0x3) << 14) +#define G_038010_REQUEST_SIZE(x) (((x) >> 14) & 0x3) +#define C_038010_REQUEST_SIZE 0xFFFF3FFF +#define S_038010_DST_SEL_X(x) (((x) & 0x7) << 16) +#define G_038010_DST_SEL_X(x) (((x) >> 16) & 0x7) +#define C_038010_DST_SEL_X 0xFFF8FFFF +#define V_038010_SQ_SEL_X 0x00000000 +#define V_038010_SQ_SEL_Y 0x00000001 +#define V_038010_SQ_SEL_Z 0x00000002 +#define V_038010_SQ_SEL_W 0x00000003 +#define V_038010_SQ_SEL_0 0x00000004 +#define V_038010_SQ_SEL_1 0x00000005 +#define S_038010_DST_SEL_Y(x) (((x) & 0x7) << 19) +#define G_038010_DST_SEL_Y(x) (((x) >> 19) & 0x7) +#define C_038010_DST_SEL_Y 0xFFC7FFFF +#define S_038010_DST_SEL_Z(x) (((x) & 0x7) << 22) +#define G_038010_DST_SEL_Z(x) (((x) >> 22) & 0x7) +#define C_038010_DST_SEL_Z 0xFE3FFFFF +#define S_038010_DST_SEL_W(x) (((x) & 0x7) << 25) +#define G_038010_DST_SEL_W(x) (((x) >> 25) & 0x7) +#define C_038010_DST_SEL_W 0xF1FFFFFF +#define S_038010_BASE_LEVEL(x) (((x) & 0xF) << 28) +#define G_038010_BASE_LEVEL(x) (((x) >> 28) & 0xF) +#define C_038010_BASE_LEVEL 0x0FFFFFFF +#define R_038014_SQ_TEX_RESOURCE_WORD5_0 0x038014 +#define S_038014_LAST_LEVEL(x) (((x) & 0xF) << 0) +#define G_038014_LAST_LEVEL(x) (((x) >> 0) & 0xF) +#define C_038014_LAST_LEVEL 0xFFFFFFF0 +#define S_038014_BASE_ARRAY(x) (((x) & 0x1FFF) << 4) +#define G_038014_BASE_ARRAY(x) (((x) >> 4) & 0x1FFF) +#define C_038014_BASE_ARRAY 0xFFFE000F +#define S_038014_LAST_ARRAY(x) (((x) & 0x1FFF) << 17) +#define G_038014_LAST_ARRAY(x) (((x) >> 17) & 0x1FFF) +#define C_038014_LAST_ARRAY 0xC001FFFF +#define R_038018_SQ_TEX_RESOURCE_WORD6_0 0x038018 +#define S_038018_MPEG_CLAMP(x) (((x) & 0x3) << 0) +#define G_038018_MPEG_CLAMP(x) (((x) >> 0) & 0x3) +#define C_038018_MPEG_CLAMP 0xFFFFFFFC +#define S_038018_PERF_MODULATION(x) (((x) & 0x7) << 5) +#define G_038018_PERF_MODULATION(x) (((x) >> 5) & 0x7) +#define C_038018_PERF_MODULATION 0xFFFFFF1F +#define S_038018_INTERLACED(x) (((x) & 0x1) << 8) +#define G_038018_INTERLACED(x) (((x) >> 8) & 0x1) +#define C_038018_INTERLACED 0xFFFFFEFF +#define S_038018_TYPE(x) (((x) & 0x3) << 30) +#define G_038018_TYPE(x) (((x) >> 30) & 0x3) +#define C_038018_TYPE 0x3FFFFFFF +#define V_038010_SQ_TEX_VTX_INVALID_TEXTURE 0x00000000 +#define V_038010_SQ_TEX_VTX_INVALID_BUFFER 0x00000001 +#define V_038010_SQ_TEX_VTX_VALID_TEXTURE 0x00000002 +#define V_038010_SQ_TEX_VTX_VALID_BUFFER 0x00000003 #define R_038008_SQ_VTX_CONSTANT_WORD2_0 0x038008 #define S_038008_BASE_ADDRESS_HI(x) (((x) & 0xFF) << 0) #define G_038008_BASE_ADDRESS_HI(x) (((x) >> 0) & 0xFF) @@ -633,6 +761,113 @@ #define S_038008_ENDIAN_SWAP(x) (((x) & 0x3) << 30) #define G_038008_ENDIAN_SWAP(x) (((x) >> 30) & 0x3) #define C_038008_ENDIAN_SWAP 0x3FFFFFFF +#define R_03C000_SQ_TEX_SAMPLER_WORD0_0 0x03C000 +#define S_03C000_CLAMP_X(x) (((x) & 0x7) << 0) +#define G_03C000_CLAMP_X(x) (((x) >> 0) & 0x7) +#define C_03C000_CLAMP_X 0xFFFFFFF8 +#define V_03C000_SQ_TEX_WRAP 0x00000000 +#define V_03C000_SQ_TEX_MIRROR 0x00000001 +#define V_03C000_SQ_TEX_CLAMP_LAST_TEXEL 0x00000002 +#define V_03C000_SQ_TEX_MIRROR_ONCE_LAST_TEXEL 0x00000003 +#define V_03C000_SQ_TEX_CLAMP_HALF_BORDER 0x00000004 +#define V_03C000_SQ_TEX_MIRROR_ONCE_HALF_BORDER 0x00000005 +#define V_03C000_SQ_TEX_CLAMP_BORDER 0x00000006 +#define V_03C000_SQ_TEX_MIRROR_ONCE_BORDER 0x00000007 +#define S_03C000_CLAMP_Y(x) (((x) & 0x7) << 3) +#define G_03C000_CLAMP_Y(x) (((x) >> 3) & 0x7) +#define C_03C000_CLAMP_Y 0xFFFFFFC7 +#define S_03C000_CLAMP_Z(x) (((x) & 0x7) << 6) +#define G_03C000_CLAMP_Z(x) (((x) >> 6) & 0x7) +#define C_03C000_CLAMP_Z 0xFFFFFE3F +#define S_03C000_XY_MAG_FILTER(x) (((x) & 0x7) << 9) +#define G_03C000_XY_MAG_FILTER(x) (((x) >> 9) & 0x7) +#define C_03C000_XY_MAG_FILTER 0xFFFFF1FF +#define V_03C000_SQ_TEX_XY_FILTER_POINT 0x00000000 +#define V_03C000_SQ_TEX_XY_FILTER_BILINEAR 0x00000001 +#define V_03C000_SQ_TEX_XY_FILTER_BICUBIC 0x00000002 +#define S_03C000_XY_MIN_FILTER(x) (((x) & 0x7) << 12) +#define G_03C000_XY_MIN_FILTER(x) (((x) >> 12) & 0x7) +#define C_03C000_XY_MIN_FILTER 0xFFFF8FFF +#define S_03C000_Z_FILTER(x) (((x) & 0x3) << 15) +#define G_03C000_Z_FILTER(x) (((x) >> 15) & 0x3) +#define C_03C000_Z_FILTER 0xFFFE7FFF +#define V_03C000_SQ_TEX_Z_FILTER_NONE 0x00000000 +#define V_03C000_SQ_TEX_Z_FILTER_POINT 0x00000001 +#define V_03C000_SQ_TEX_Z_FILTER_LINEAR 0x00000002 +#define S_03C000_MIP_FILTER(x) (((x) & 0x3) << 17) +#define G_03C000_MIP_FILTER(x) (((x) >> 17) & 0x3) +#define C_03C000_MIP_FILTER 0xFFF9FFFF +#define S_03C000_BORDER_COLOR_TYPE(x) (((x) & 0x3) << 22) +#define G_03C000_BORDER_COLOR_TYPE(x) (((x) >> 22) & 0x3) +#define C_03C000_BORDER_COLOR_TYPE 0xFF3FFFFF +#define V_03C000_SQ_TEX_BORDER_COLOR_TRANS_BLACK 0x00000000 +#define V_03C000_SQ_TEX_BORDER_COLOR_OPAQUE_BLACK 0x00000001 +#define V_03C000_SQ_TEX_BORDER_COLOR_OPAQUE_WHITE 0x00000002 +#define V_03C000_SQ_TEX_BORDER_COLOR_REGISTER 0x00000003 +#define S_03C000_POINT_SAMPLING_CLAMP(x) (((x) & 0x1) << 24) +#define G_03C000_POINT_SAMPLING_CLAMP(x) (((x) >> 24) & 0x1) +#define C_03C000_POINT_SAMPLING_CLAMP 0xFEFFFFFF +#define S_03C000_TEX_ARRAY_OVERRIDE(x) (((x) & 0x1) << 25) +#define G_03C000_TEX_ARRAY_OVERRIDE(x) (((x) >> 25) & 0x1) +#define C_03C000_TEX_ARRAY_OVERRIDE 0xFDFFFFFF +#define S_03C000_DEPTH_COMPARE_FUNCTION(x) (((x) & 0x7) << 26) +#define G_03C000_DEPTH_COMPARE_FUNCTION(x) (((x) >> 26) & 0x7) +#define C_03C000_DEPTH_COMPARE_FUNCTION 0xE3FFFFFF +#define V_03C000_SQ_TEX_DEPTH_COMPARE_NEVER 0x00000000 +#define V_03C000_SQ_TEX_DEPTH_COMPARE_LESS 0x00000001 +#define V_03C000_SQ_TEX_DEPTH_COMPARE_EQUAL 0x00000002 +#define V_03C000_SQ_TEX_DEPTH_COMPARE_LESSEQUAL 0x00000003 +#define V_03C000_SQ_TEX_DEPTH_COMPARE_GREATER 0x00000004 +#define V_03C000_SQ_TEX_DEPTH_COMPARE_NOTEQUAL 0x00000005 +#define V_03C000_SQ_TEX_DEPTH_COMPARE_GREATEREQUAL 0x00000006 +#define V_03C000_SQ_TEX_DEPTH_COMPARE_ALWAYS 0x00000007 +#define S_03C000_CHROMA_KEY(x) (((x) & 0x3) << 29) +#define G_03C000_CHROMA_KEY(x) (((x) >> 29) & 0x3) +#define C_03C000_CHROMA_KEY 0x9FFFFFFF +#define V_03C000_SQ_TEX_CHROMA_KEY_DISABLE 0x00000000 +#define V_03C000_SQ_TEX_CHROMA_KEY_KILL 0x00000001 +#define V_03C000_SQ_TEX_CHROMA_KEY_BLEND 0x00000002 +#define S_03C000_LOD_USES_MINOR_AXIS(x) (((x) & 0x1) << 31) +#define G_03C000_LOD_USES_MINOR_AXIS(x) (((x) >> 31) & 0x1) +#define C_03C000_LOD_USES_MINOR_AXIS 0x7FFFFFFF +#define R_03C004_SQ_TEX_SAMPLER_WORD1_0 0x03C004 +#define S_03C004_MIN_LOD(x) (((x) & 0x3FF) << 0) +#define G_03C004_MIN_LOD(x) (((x) >> 0) & 0x3FF) +#define C_03C004_MIN_LOD 0xFFFFFC00 +#define S_03C004_MAX_LOD(x) (((x) & 0x3FF) << 10) +#define G_03C004_MAX_LOD(x) (((x) >> 10) & 0x3FF) +#define C_03C004_MAX_LOD 0xFFF003FF +#define S_03C004_LOD_BIAS(x) (((x) & 0xFFF) << 20) +#define G_03C004_LOD_BIAS(x) (((x) >> 20) & 0xFFF) +#define C_03C004_LOD_BIAS 0x000FFFFF +#define R_03C008_SQ_TEX_SAMPLER_WORD2_0 0x03C008 +#define S_03C008_LOD_BIAS_SEC(x) (((x) & 0xFFF) << 0) +#define G_03C008_LOD_BIAS_SEC(x) (((x) >> 0) & 0xFFF) +#define C_03C008_LOD_BIAS_SEC 0xFFFFF000 +#define S_03C008_MC_COORD_TRUNCATE(x) (((x) & 0x1) << 12) +#define G_03C008_MC_COORD_TRUNCATE(x) (((x) >> 12) & 0x1) +#define C_03C008_MC_COORD_TRUNCATE 0xFFFFEFFF +#define S_03C008_FORCE_DEGAMMA(x) (((x) & 0x1) << 13) +#define G_03C008_FORCE_DEGAMMA(x) (((x) >> 13) & 0x1) +#define C_03C008_FORCE_DEGAMMA 0xFFFFDFFF +#define S_03C008_HIGH_PRECISION_FILTER(x) (((x) & 0x1) << 14) +#define G_03C008_HIGH_PRECISION_FILTER(x) (((x) >> 14) & 0x1) +#define C_03C008_HIGH_PRECISION_FILTER 0xFFFFBFFF +#define S_03C008_PERF_MIP(x) (((x) & 0x7) << 15) +#define G_03C008_PERF_MIP(x) (((x) >> 15) & 0x7) +#define C_03C008_PERF_MIP 0xFFFC7FFF +#define S_03C008_PERF_Z(x) (((x) & 0x3) << 18) +#define G_03C008_PERF_Z(x) (((x) >> 18) & 0x3) +#define C_03C008_PERF_Z 0xFFF3FFFF +#define S_03C008_FETCH_4(x) (((x) & 0x1) << 26) +#define G_03C008_FETCH_4(x) (((x) >> 26) & 0x1) +#define C_03C008_FETCH_4 0xFBFFFFFF +#define S_03C008_SAMPLE_IS_PCF(x) (((x) & 0x1) << 27) +#define G_03C008_SAMPLE_IS_PCF(x) (((x) >> 27) & 0x1) +#define C_03C008_SAMPLE_IS_PCF 0xF7FFFFFF +#define S_03C008_TYPE(x) (((x) & 0x1) << 31) +#define G_03C008_TYPE(x) (((x) >> 31) & 0x1) +#define C_03C008_TYPE 0x7FFFFFFF #define R_008958_VGT_PRIMITIVE_TYPE 0x008958 #define S_008958_PRIM_TYPE(x) (((x) & 0x3F) << 0) #define G_008958_PRIM_TYPE(x) (((x) >> 0) & 0x3F) -- cgit v1.2.3 From b172aebfdf288d3487876f7cb01c62582920d4c2 Mon Sep 17 00:00:00 2001 From: Vinson Lee Date: Tue, 27 Jul 2010 18:13:47 -0700 Subject: scons: Fix sunos5 build. --- src/gallium/tests/graw/SConscript | 3 +++ src/gallium/tests/unit/SConscript | 3 +++ 2 files changed, 6 insertions(+) (limited to 'src/gallium') diff --git a/src/gallium/tests/graw/SConscript b/src/gallium/tests/graw/SConscript index 61121732e3..7e39ec21a4 100644 --- a/src/gallium/tests/graw/SConscript +++ b/src/gallium/tests/graw/SConscript @@ -11,6 +11,9 @@ env = env.Clone() env.Prepend(LIBPATH = [graw.dir]) env.Prepend(LIBS = ['graw'] + gallium) +if platform == 'sunos5': + env.Append(LIBS = ['m']) + progs = [ 'clear', 'tri', diff --git a/src/gallium/tests/unit/SConscript b/src/gallium/tests/unit/SConscript index 8a9f3504c7..a200123f44 100644 --- a/src/gallium/tests/unit/SConscript +++ b/src/gallium/tests/unit/SConscript @@ -4,6 +4,9 @@ env = env.Clone() env.Prepend(LIBS = [gallium]) +if platform == 'sunos5': + env.Append(LIBS = ['m']) + progs = [ 'pipe_barrier_test', 'u_cache_test', -- cgit v1.2.3 From f514ad0a9b9bebde0808eadb1e63ea548260be1e Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Wed, 28 Jul 2010 11:51:16 +1000 Subject: r600g: use gallium util for float->ui conversion --- src/gallium/drivers/r600/r600_screen.h | 13 ------------- src/gallium/drivers/r600/r600_state.c | 12 ++++++------ 2 files changed, 6 insertions(+), 19 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/drivers/r600/r600_screen.h b/src/gallium/drivers/r600/r600_screen.h index 0a0286d96b..7a373cd0ef 100644 --- a/src/gallium/drivers/r600/r600_screen.h +++ b/src/gallium/drivers/r600/r600_screen.h @@ -88,17 +88,4 @@ void r600_texture_transfer_unmap(struct pipe_context *ctx, int r600_conv_pipe_format(unsigned pformat, unsigned *format); int r600_conv_pipe_prim(unsigned pprim, unsigned *prim); -union r600_float_to_u32_u { - u32 u; - float f; -}; - -static inline u32 r600_float_to_u32(float f) -{ - union r600_float_to_u32_u c; - - c.f = f; - return c.u; -} - #endif diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index 6503c3740e..367a1f9991 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -579,12 +579,12 @@ static void r600_set_viewport_state(struct pipe_context *ctx, return; rstate->states[R600_VIEWPORT__PA_SC_VPORT_ZMIN_0] = 0x00000000; rstate->states[R600_VIEWPORT__PA_SC_VPORT_ZMAX_0] = 0x3F800000; - rstate->states[R600_VIEWPORT__PA_CL_VPORT_XSCALE_0] = r600_float_to_u32(state->scale[0]); - rstate->states[R600_VIEWPORT__PA_CL_VPORT_YSCALE_0] = r600_float_to_u32(state->scale[1]); - rstate->states[R600_VIEWPORT__PA_CL_VPORT_ZSCALE_0] = r600_float_to_u32(state->scale[2]); - rstate->states[R600_VIEWPORT__PA_CL_VPORT_XOFFSET_0] = r600_float_to_u32(state->translate[0]); - rstate->states[R600_VIEWPORT__PA_CL_VPORT_YOFFSET_0] = r600_float_to_u32(state->translate[1]); - rstate->states[R600_VIEWPORT__PA_CL_VPORT_ZOFFSET_0] = r600_float_to_u32(state->translate[2]); + rstate->states[R600_VIEWPORT__PA_CL_VPORT_XSCALE_0] = fui(state->scale[0]); + rstate->states[R600_VIEWPORT__PA_CL_VPORT_YSCALE_0] = fui(state->scale[1]); + rstate->states[R600_VIEWPORT__PA_CL_VPORT_ZSCALE_0] = fui(state->scale[2]); + rstate->states[R600_VIEWPORT__PA_CL_VPORT_XOFFSET_0] = fui(state->translate[0]); + rstate->states[R600_VIEWPORT__PA_CL_VPORT_YOFFSET_0] = fui(state->translate[1]); + rstate->states[R600_VIEWPORT__PA_CL_VPORT_ZOFFSET_0] = fui(state->translate[2]); rstate->states[R600_VIEWPORT__PA_CL_VTE_CNTL] = 0x0000043F; if (radeon_state_pm4(rstate)) { radeon_state_decref(rstate); -- cgit v1.2.3 From 0a663bb4dbfd8389fd659184cfa4007b268fb140 Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Tue, 27 Jul 2010 16:34:57 -0700 Subject: gallium/docs: Fix VMware spelling. --- src/gallium/docs/source/distro.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/docs/source/distro.rst b/src/gallium/docs/source/distro.rst index 6ba5a056f4..e379ad3271 100644 --- a/src/gallium/docs/source/distro.rst +++ b/src/gallium/docs/source/distro.rst @@ -51,10 +51,10 @@ nVidia nv50 Driver for the nVidia nv50 family of GPUs. -VMWare SVGA +VMware SVGA ^^^^^^^^^^^ -Driver for VMWare virtualized guest operating system graphics processing. +Driver for VMware virtualized guest operating system graphics processing. ATI r300 ^^^^^^^^ -- cgit v1.2.3 From 2ab24a6faedb0f9b93055cbf3d52be1120353ee1 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Wed, 28 Jul 2010 15:26:14 +1000 Subject: r600g: fix up segfault with variation between views and count. For some reason gallium hands us something with lots of empty views, and we are expected to deal with it, just do what r300g does for this bit. --- src/gallium/drivers/r600/r600_state.c | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index 367a1f9991..2fdcdea14e 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -481,18 +481,26 @@ static void r600_set_fragment_sampler_views(struct pipe_context *ctx, struct r600_texture_resource *rtexture; struct r600_context *rctx = r600_context(ctx); struct pipe_sampler_view *tmp; - unsigned i; + unsigned i, real_num_views = 0; if (views == NULL) return; + for (i = 0; i < count; i++) { + if (views[i]) + real_num_views++; + } + for (i = 0; i < rctx->nps_view; i++) { tmp = &rctx->ps_view[i]->view; pipe_sampler_view_reference(&tmp, NULL); rctx->ps_view[i] = NULL; } - rctx->nps_view = count; + rctx->nps_view = real_num_views; for (i = 0; i < count; i++) { + + if (!views[i]) + continue; rtexture = LIST_ENTRY(struct r600_texture_resource, views[i], view); rctx->ps_view[i] = rtexture; tmp = NULL; @@ -508,18 +516,24 @@ static void r600_set_vertex_sampler_views(struct pipe_context *ctx, struct r600_texture_resource *rtexture; struct r600_context *rctx = r600_context(ctx); struct pipe_sampler_view *tmp; - unsigned i; + unsigned i, real_num_views = 0; if (views == NULL) return; + for (i = 0; i < count; i++) { + if (views[i]) + real_num_views++; + } for (i = 0; i < rctx->nvs_view; i++) { tmp = &rctx->vs_view[i]->view; pipe_sampler_view_reference(&tmp, NULL); rctx->vs_view[i] = NULL; } - rctx->nps_view = count; + rctx->nvs_view = real_num_views; for (i = 0; i < count; i++) { + if (!views[i]) + continue; rtexture = LIST_ENTRY(struct r600_texture_resource, views[i], view); rctx->vs_view[i] = rtexture; tmp = NULL; -- cgit v1.2.3 From da30209afdd77199c98694ef64c6eaea557d0918 Mon Sep 17 00:00:00 2001 From: Vinson Lee Date: Wed, 28 Jul 2010 01:07:03 -0700 Subject: draw: Fix VMware spelling. --- src/gallium/auxiliary/draw/draw_gs.c | 2 +- src/gallium/auxiliary/draw/draw_gs.h | 2 +- src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/auxiliary/draw/draw_gs.c b/src/gallium/auxiliary/draw/draw_gs.c index 79a57a67f3..0c590f936b 100644 --- a/src/gallium/auxiliary/draw/draw_gs.c +++ b/src/gallium/auxiliary/draw/draw_gs.c @@ -1,6 +1,6 @@ /************************************************************************** * - * Copyright 2009 VMWare Inc. + * Copyright 2009 VMware, Inc. * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a diff --git a/src/gallium/auxiliary/draw/draw_gs.h b/src/gallium/auxiliary/draw/draw_gs.h index 2cb634818c..06f4b822a2 100644 --- a/src/gallium/auxiliary/draw/draw_gs.h +++ b/src/gallium/auxiliary/draw/draw_gs.h @@ -1,6 +1,6 @@ /************************************************************************** * - * Copyright 2009 VMWare Inc. + * Copyright 2009 VMware, Inc. * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c index bc074df8c2..5c9db12086 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c @@ -1,6 +1,6 @@ /************************************************************************** * - * Copyright 2010 VMWare, Inc. + * Copyright 2010 VMware, Inc. * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a -- cgit v1.2.3 From 8bae190f18dfa91cd60d91d91169503c3addb11f Mon Sep 17 00:00:00 2001 From: Vinson Lee Date: Wed, 28 Jul 2010 01:09:40 -0700 Subject: gallium/docs: Fix VMware spelling. --- src/gallium/docs/source/conf.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/docs/source/conf.py b/src/gallium/docs/source/conf.py index ccc84405c4..99e665234e 100644 --- a/src/gallium/docs/source/conf.py +++ b/src/gallium/docs/source/conf.py @@ -38,7 +38,7 @@ master_doc = 'index' # General information about the project. project = u'Gallium' -copyright = u'2009, VMWare, X.org, Nouveau' +copyright = u'2009, VMware, X.org, Nouveau' # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the @@ -176,7 +176,7 @@ htmlhelp_basename = 'Galliumdoc' # (source start file, target name, title, author, documentclass [howto/manual]). latex_documents = [ ('index', 'Gallium.tex', u'Gallium Documentation', - u'VMWare, X.org, Nouveau', 'manual'), + u'VMware, X.org, Nouveau', 'manual'), ] # The name of an image file (relative to this directory) to place at the top of -- cgit v1.2.3 From 9961a0b92de7afed2afec62dadad08d76d1d3374 Mon Sep 17 00:00:00 2001 From: Jerome Glisse Date: Wed, 28 Jul 2010 09:11:14 -0400 Subject: r600g: actualy fix the literal emission Previous patch added literal emission to wrong place, we want to emit literal before emitting a new alu group. Signed-off-by: Jerome Glisse --- src/gallium/drivers/r600/r600_shader.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index 9796112775..a2d641dced 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -557,9 +557,6 @@ static int tgsi_slt(struct r600_shader_ctx *ctx) struct r600_bc_alu alu; int i, r; - r = r600_bc_add_literal(ctx->bc, ctx->value); - if (r) - return r; for (i = 0; i < 4; i++) { memset(&alu, 0, sizeof(struct r600_bc_alu)); if (!(inst->Dst[0].Register.WriteMask & (1 << i))) { @@ -739,6 +736,9 @@ static int tgsi_helper_copy(struct r600_shader_ctx *ctx, struct tgsi_full_instru struct r600_bc_alu alu; int i, r; + r = r600_bc_add_literal(ctx->bc, ctx->value); + if (r) + return r; for (i = 0; i < 4; i++) { memset(&alu, 0, sizeof(struct r600_bc_alu)); if (!(inst->Dst[0].Register.WriteMask & (1 << i))) { -- cgit v1.2.3 From b346c4205dc19c9ffbff48ed8bd89687772a96f8 Mon Sep 17 00:00:00 2001 From: Jerome Glisse Date: Wed, 28 Jul 2010 10:35:32 -0400 Subject: r600g: add lrp instruction support Signed-off-by: Jerome Glisse --- src/gallium/drivers/r600/r600_shader.c | 83 ++++++++++++++++++++++++++++++++-- 1 file changed, 80 insertions(+), 3 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index a2d641dced..3909c704e7 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -587,10 +587,8 @@ static int tgsi_lit(struct r600_shader_ctx *ctx) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; struct r600_bc_alu alu; - int r; - if (inst->Dst[0].Register.WriteMask & (1 << 0)) { /* dst.x, <- 1.0 */ @@ -930,6 +928,85 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) return r600_bc_add_tex(ctx->bc, &tex); } +static int tgsi_lrp(struct r600_shader_ctx *ctx) +{ + struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; + struct r600_bc_alu alu; + unsigned i; + int r; + + /* 1 - src0 */ + for (i = 0; i < 4; i++) { + memset(&alu, 0, sizeof(struct r600_bc_alu)); + alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD; + alu.src[0].sel = 249; + alu.src[0].chan = 0; + r = tgsi_src(ctx, &inst->Src[0], i, &alu.src[1]); + if (r) + return r; + alu.src[1].neg = 1; + alu.dst.sel = ctx->temp_reg; + alu.dst.chan = i; + if (i == 3) { + alu.last = 1; + } + alu.dst.write = 1; + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + } + r = r600_bc_add_literal(ctx->bc, ctx->value); + if (r) + return r; + + /* (1 - src0) * src2 */ + for (i = 0; i < 4; i++) { + memset(&alu, 0, sizeof(struct r600_bc_alu)); + alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL; + alu.src[0].sel = ctx->temp_reg; + alu.src[0].chan = i; + r = tgsi_src(ctx, &inst->Src[2], i, &alu.src[1]); + if (r) + return r; + alu.dst.sel = ctx->temp_reg; + alu.dst.chan = i; + if (i == 3) { + alu.last = 1; + } + alu.dst.write = 1; + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + } + r = r600_bc_add_literal(ctx->bc, ctx->value); + if (r) + return r; + + /* src0 * src1 + (1 - src0) * src2 */ + for (i = 0; i < 4; i++) { + memset(&alu, 0, sizeof(struct r600_bc_alu)); + alu.inst = V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD; + alu.is_op3 = 1; + r = tgsi_src(ctx, &inst->Src[0], i, &alu.src[0]); + if (r) + return r; + r = tgsi_src(ctx, &inst->Src[1], i, &alu.src[1]); + if (r) + return r; + alu.src[2].sel = ctx->temp_reg; + alu.src[2].chan = i; + alu.dst.sel = ctx->temp_reg; + alu.dst.chan = i; + if (i == 3) { + alu.last = 1; + } + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + } + return tgsi_helper_copy(ctx, inst); +} + static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = { {TGSI_OPCODE_ARL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_MOV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2}, @@ -949,7 +1026,7 @@ static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = { {TGSI_OPCODE_SGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_MAD, 1, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3}, {TGSI_OPCODE_SUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2}, - {TGSI_OPCODE_LRP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_LRP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp}, {TGSI_OPCODE_CND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, /* gap */ {20, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, -- cgit v1.2.3 From 742ee7935da60dda974795243d2e0fcf31accb59 Mon Sep 17 00:00:00 2001 From: Jerome Glisse Date: Wed, 28 Jul 2010 12:18:19 -0400 Subject: r600g: cleanup resource buffer/texture mess Use a common function, fix the mess it was before. Signed-off-by: Jerome Glisse --- src/gallium/drivers/r600/r600_buffer.c | 59 +++++------ src/gallium/drivers/r600/r600_context.c | 2 +- src/gallium/drivers/r600/r600_draw.c | 5 +- src/gallium/drivers/r600/r600_resource.c | 54 +++++----- src/gallium/drivers/r600/r600_resource.h | 35 ++++++- src/gallium/drivers/r600/r600_screen.c | 69 +------------ src/gallium/drivers/r600/r600_screen.h | 8 -- src/gallium/drivers/r600/r600_state.c | 28 +++--- src/gallium/drivers/r600/r600_texture.c | 163 +++++++++++++++++++------------ src/gallium/drivers/r600/r600_texture.h | 53 ---------- 10 files changed, 217 insertions(+), 259 deletions(-) delete mode 100644 src/gallium/drivers/r600/r600_texture.h (limited to 'src/gallium') diff --git a/src/gallium/drivers/r600/r600_buffer.c b/src/gallium/drivers/r600/r600_buffer.c index bc6e336ba7..167d117520 100644 --- a/src/gallium/drivers/r600/r600_buffer.c +++ b/src/gallium/drivers/r600/r600_buffer.c @@ -32,10 +32,11 @@ #include "state_tracker/drm_driver.h" #include "r600_screen.h" #include "r600_context.h" +#include "r600_resource.h" extern struct u_resource_vtbl r600_buffer_vtbl; -static u32 r600_domain_from_usage(unsigned usage) +u32 r600_domain_from_usage(unsigned usage) { u32 domain = RADEON_GEM_DOMAIN_GTT; @@ -63,47 +64,47 @@ struct pipe_resource *r600_buffer_create(struct pipe_screen *screen, const struct pipe_resource *templ) { struct r600_screen *rscreen = r600_screen(screen); - struct r600_buffer *rbuffer; + struct r600_resource *rbuffer; struct radeon_bo *bo; struct pb_desc desc; /* XXX We probably want a different alignment for buffers and textures. */ unsigned alignment = 4096; - rbuffer = CALLOC_STRUCT(r600_buffer); + rbuffer = CALLOC_STRUCT(r600_resource); if (rbuffer == NULL) return NULL; - rbuffer->b.b = *templ; - pipe_reference_init(&rbuffer->b.b.reference, 1); - rbuffer->b.b.screen = screen; - rbuffer->b.vtbl = &r600_buffer_vtbl; + rbuffer->base.b = *templ; + pipe_reference_init(&rbuffer->base.b.reference, 1); + rbuffer->base.b.screen = screen; + rbuffer->base.vtbl = &r600_buffer_vtbl; - if (rbuffer->b.b.bind & PIPE_BIND_CONSTANT_BUFFER) { + if (rbuffer->base.b.bind & PIPE_BIND_CONSTANT_BUFFER) { desc.alignment = alignment; - desc.usage = rbuffer->b.b.bind; - rbuffer->pb = pb_malloc_buffer_create(rbuffer->b.b.width0, + desc.usage = rbuffer->base.b.bind; + rbuffer->pb = pb_malloc_buffer_create(rbuffer->base.b.width0, &desc); if (rbuffer->pb == NULL) { free(rbuffer); return NULL; } - return &rbuffer->b.b; + return &rbuffer->base.b; } - rbuffer->domain = r600_domain_from_usage(rbuffer->b.b.bind); - bo = radeon_bo(rscreen->rw, 0, rbuffer->b.b.width0, alignment, NULL); + rbuffer->domain = r600_domain_from_usage(rbuffer->base.b.bind); + bo = radeon_bo(rscreen->rw, 0, rbuffer->base.b.width0, alignment, NULL); if (bo == NULL) { FREE(rbuffer); return NULL; } rbuffer->bo = bo; - return &rbuffer->b.b; + return &rbuffer->base.b; } struct pipe_resource *r600_user_buffer_create(struct pipe_screen *screen, void *ptr, unsigned bytes, unsigned bind) { - struct r600_buffer *rbuffer; + struct r600_resource *rbuffer; struct r600_screen *rscreen = r600_screen(screen); struct pipe_resource templ; @@ -116,20 +117,20 @@ struct pipe_resource *r600_user_buffer_create(struct pipe_screen *screen, templ.height0 = 1; templ.depth0 = 1; - rbuffer = (struct r600_buffer*)r600_buffer_create(screen, &templ); + rbuffer = (struct r600_resource*)r600_buffer_create(screen, &templ); if (rbuffer == NULL) { return NULL; } radeon_bo_map(rscreen->rw, rbuffer->bo); memcpy(rbuffer->bo->data, ptr, bytes); radeon_bo_unmap(rscreen->rw, rbuffer->bo); - return &rbuffer->b.b; + return &rbuffer->base.b; } static void r600_buffer_destroy(struct pipe_screen *screen, struct pipe_resource *buf) { - struct r600_buffer *rbuffer = (struct r600_buffer*)buf; + struct r600_resource *rbuffer = (struct r600_resource*)buf; struct r600_screen *rscreen = r600_screen(screen); if (rbuffer->pb) { @@ -146,7 +147,7 @@ static void r600_buffer_destroy(struct pipe_screen *screen, static void *r600_buffer_transfer_map(struct pipe_context *pipe, struct pipe_transfer *transfer) { - struct r600_buffer *rbuffer = (struct r600_buffer*)transfer->resource; + struct r600_resource *rbuffer = (struct r600_resource*)transfer->resource; struct r600_screen *rscreen = r600_screen(pipe->screen); int write = 0; @@ -166,9 +167,9 @@ static void *r600_buffer_transfer_map(struct pipe_context *pipe, } static void r600_buffer_transfer_unmap(struct pipe_context *pipe, - struct pipe_transfer *transfer) + struct pipe_transfer *transfer) { - struct r600_buffer *rbuffer = (struct r600_buffer*)transfer->resource; + struct r600_resource *rbuffer = (struct r600_resource*)transfer->resource; struct r600_screen *rscreen = r600_screen(pipe->screen); if (rbuffer->pb) { @@ -188,7 +189,7 @@ unsigned r600_buffer_is_referenced_by_cs(struct pipe_context *context, struct pipe_resource *buf, unsigned face, unsigned level) { - /* XXX */ + /* FIXME */ return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE; } @@ -196,7 +197,7 @@ struct pipe_resource *r600_buffer_from_handle(struct pipe_screen *screen, struct winsys_handle *whandle) { struct radeon *rw = (struct radeon*)screen->winsys; - struct r600_buffer *rbuffer; + struct r600_resource *rbuffer; struct radeon_bo *bo = NULL; bo = radeon_bo(rw, whandle->handle, 0, 0, NULL); @@ -204,18 +205,18 @@ struct pipe_resource *r600_buffer_from_handle(struct pipe_screen *screen, return NULL; } - rbuffer = CALLOC_STRUCT(r600_buffer); + rbuffer = CALLOC_STRUCT(r600_resource); if (rbuffer == NULL) { radeon_bo_decref(rw, bo); return NULL; } - pipe_reference_init(&rbuffer->b.b.reference, 1); - rbuffer->b.b.target = PIPE_BUFFER; - rbuffer->b.b.screen = screen; - rbuffer->b.vtbl = &r600_buffer_vtbl; + pipe_reference_init(&rbuffer->base.b.reference, 1); + rbuffer->base.b.target = PIPE_BUFFER; + rbuffer->base.b.screen = screen; + rbuffer->base.vtbl = &r600_buffer_vtbl; rbuffer->bo = bo; - return &rbuffer->b.b; + return &rbuffer->base.b; } struct u_resource_vtbl r600_buffer_vtbl = diff --git a/src/gallium/drivers/r600/r600_context.c b/src/gallium/drivers/r600/r600_context.c index 3c5195f79e..f2875f4380 100644 --- a/src/gallium/drivers/r600/r600_context.c +++ b/src/gallium/drivers/r600/r600_context.c @@ -29,9 +29,9 @@ #include #include #include -#include "r600_resource.h" #include "r600_screen.h" #include "r600_context.h" +#include "r600_resource.h" #include "r600d.h" static void r600_destroy_context(struct pipe_context *context) diff --git a/src/gallium/drivers/r600/r600_draw.c b/src/gallium/drivers/r600/r600_draw.c index e0d624889f..8e9d11b855 100644 --- a/src/gallium/drivers/r600/r600_draw.c +++ b/src/gallium/drivers/r600/r600_draw.c @@ -33,6 +33,7 @@ #include #include "r600_screen.h" #include "r600_context.h" +#include "r600_resource.h" #include "r600d.h" struct r600_draw { @@ -51,7 +52,7 @@ static int r600_draw_common(struct r600_draw *draw) struct r600_context *rctx = r600_context(draw->ctx); struct r600_screen *rscreen = rctx->screen; struct radeon_state *vs_resource; - struct r600_buffer *rbuffer; + struct r600_resource *rbuffer; unsigned i, j, offset, format, prim; u32 vgt_dma_index_type, vgt_draw_initiator; int r; @@ -101,7 +102,7 @@ static int r600_draw_common(struct r600_draw *draw) for (i = 0 ; i < rctx->vertex_elements->count; i++) { j = rctx->vertex_elements->elements[i].vertex_buffer_index; - rbuffer = (struct r600_buffer*)rctx->vertex_buffer[j].buffer; + rbuffer = (struct r600_resource*)rctx->vertex_buffer[j].buffer; offset = rctx->vertex_elements->elements[i].src_offset + rctx->vertex_buffer[j].buffer_offset; r = r600_conv_pipe_format(rctx->vertex_elements->elements[i].src_format, &format); if (r) diff --git a/src/gallium/drivers/r600/r600_resource.c b/src/gallium/drivers/r600/r600_resource.c index d9aa1df04f..292c5d294d 100644 --- a/src/gallium/drivers/r600/r600_resource.c +++ b/src/gallium/drivers/r600/r600_resource.c @@ -26,43 +26,43 @@ #include "r600_screen.h" #include "r600_texture.h" -static struct pipe_resource * -r600_resource_create(struct pipe_screen *screen, - const struct pipe_resource *templ) +static struct pipe_resource *r600_resource_create(struct pipe_screen *screen, + const struct pipe_resource *templ) { - if (templ->target == PIPE_BUFFER) - return r600_buffer_create(screen, templ); - else - return r600_texture_create(screen, templ); + if (templ->target == PIPE_BUFFER) { + return r600_buffer_create(screen, templ); + } else { + return r600_texture_create(screen, templ); + } } -static struct pipe_resource * -r600_resource_from_handle(struct pipe_screen * screen, - const struct pipe_resource *templ, - struct winsys_handle *whandle) +static struct pipe_resource *r600_resource_from_handle(struct pipe_screen * screen, + const struct pipe_resource *templ, + struct winsys_handle *whandle) { - if (templ->target == PIPE_BUFFER) - return NULL; - else - return r600_texture_from_handle(screen, templ, whandle); + if (templ->target == PIPE_BUFFER) { + return NULL; + } else { + return r600_texture_from_handle(screen, templ, whandle); + } } void r600_init_context_resource_functions(struct r600_context *r600) { - r600->context.get_transfer = u_get_transfer_vtbl; - r600->context.transfer_map = u_transfer_map_vtbl; - r600->context.transfer_flush_region = u_transfer_flush_region_vtbl; - r600->context.transfer_unmap = u_transfer_unmap_vtbl; - r600->context.transfer_destroy = u_transfer_destroy_vtbl; - r600->context.transfer_inline_write = u_transfer_inline_write_vtbl; - r600->context.is_resource_referenced = u_is_resource_referenced_vtbl; + r600->context.get_transfer = u_get_transfer_vtbl; + r600->context.transfer_map = u_transfer_map_vtbl; + r600->context.transfer_flush_region = u_transfer_flush_region_vtbl; + r600->context.transfer_unmap = u_transfer_unmap_vtbl; + r600->context.transfer_destroy = u_transfer_destroy_vtbl; + r600->context.transfer_inline_write = u_transfer_inline_write_vtbl; + r600->context.is_resource_referenced = u_is_resource_referenced_vtbl; } void r600_init_screen_resource_functions(struct r600_screen *r600screen) { - r600screen->screen.resource_create = r600_resource_create; - r600screen->screen.resource_from_handle = r600_resource_from_handle; - r600screen->screen.resource_get_handle = u_resource_get_handle_vtbl; - r600screen->screen.resource_destroy = u_resource_destroy_vtbl; - r600screen->screen.user_buffer_create = r600_user_buffer_create; + r600screen->screen.resource_create = r600_resource_create; + r600screen->screen.resource_from_handle = r600_resource_from_handle; + r600screen->screen.resource_get_handle = u_resource_get_handle_vtbl; + r600screen->screen.resource_destroy = u_resource_destroy_vtbl; + r600screen->screen.user_buffer_create = r600_user_buffer_create; } diff --git a/src/gallium/drivers/r600/r600_resource.h b/src/gallium/drivers/r600/r600_resource.h index 95084a371b..0139a3b777 100644 --- a/src/gallium/drivers/r600/r600_resource.h +++ b/src/gallium/drivers/r600/r600_resource.h @@ -20,14 +20,47 @@ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE * USE OR OTHER DEALINGS IN THE SOFTWARE. */ - #ifndef R600_RESOURCE_H #define R600_RESOURCE_H +#include "util/u_transfer.h" + struct r600_context; struct r600_screen; +/* This gets further specialized into either buffer or texture + * structures. Use the vtbl struct to choose between the two + * underlying implementations. + */ +struct r600_resource { + struct u_resource base; + struct radeon_bo *bo; + u32 domain; + u32 flink; + struct pb_buffer *pb; +}; + +struct r600_resource_texture { + struct r600_resource resource; + unsigned long offset[PIPE_MAX_TEXTURE_LEVELS]; + unsigned long pitch[PIPE_MAX_TEXTURE_LEVELS]; + unsigned long stride[PIPE_MAX_TEXTURE_LEVELS]; + unsigned long layer_size[PIPE_MAX_TEXTURE_LEVELS]; + unsigned long stride_override; + unsigned long size; +}; + void r600_init_context_resource_functions(struct r600_context *r600); void r600_init_screen_resource_functions(struct r600_screen *r600screen); +/* r600_buffer */ +u32 r600_domain_from_usage(unsigned usage); + +/* r600_texture */ +struct pipe_resource *r600_texture_create(struct pipe_screen *screen, + const struct pipe_resource *templ); +struct pipe_resource *r600_texture_from_handle(struct pipe_screen *screen, + const struct pipe_resource *base, + struct winsys_handle *whandle); + #endif diff --git a/src/gallium/drivers/r600/r600_screen.c b/src/gallium/drivers/r600/r600_screen.c index dec6fa8d27..e0d74ca558 100644 --- a/src/gallium/drivers/r600/r600_screen.c +++ b/src/gallium/drivers/r600/r600_screen.c @@ -24,15 +24,14 @@ * Jerome Glisse * Corbin Simpson */ -#include -#include -#include -#include "r600_resource.h" +#include +#include "util/u_inlines.h" +#include "util/u_format.h" +#include "util/u_memory.h" #include "r600_screen.h" -#include "r600_texture.h" #include "r600_context.h" #include "r600_public.h" -#include +#include "r600_resource.h" static const char* r600_get_vendor(struct pipe_screen* pscreen) { @@ -180,64 +179,6 @@ static boolean r600_is_format_supported(struct pipe_screen* screen, return FALSE; } -struct pipe_transfer* r600_texture_get_transfer(struct pipe_context *ctx, - struct pipe_resource *texture, - struct pipe_subresource sr, - unsigned usage, - const struct pipe_box *box) -{ - struct r600_texture *rtex = (struct r600_texture*)texture; - struct r600_transfer *trans; - - trans = CALLOC_STRUCT(r600_transfer); - if (trans == NULL) - return NULL; - pipe_resource_reference(&trans->transfer.resource, texture); - trans->transfer.sr = sr; - trans->transfer.usage = usage; - trans->transfer.box = *box; - trans->transfer.stride = rtex->stride[sr.level]; - trans->offset = r600_texture_get_offset(rtex, sr.level, box->z, sr.face); - return &trans->transfer; -} - -void r600_texture_transfer_destroy(struct pipe_context *ctx, - struct pipe_transfer *trans) -{ - pipe_resource_reference(&trans->resource, NULL); - FREE(trans); -} - -void* r600_texture_transfer_map(struct pipe_context *ctx, - struct pipe_transfer* transfer) -{ - struct r600_transfer *rtransfer = (struct r600_transfer*)transfer; - struct r600_texture *rtex = (struct r600_texture*)transfer->resource; - char *map; - enum pipe_format format = rtex->b.b.format; - - map = pipe_buffer_map(ctx, rtex->buffer, - transfer->usage, - &rtransfer->buffer_transfer); - - if (!map) { - return NULL; - } - - return map + rtransfer->offset + - transfer->box.y / util_format_get_blockheight(format) * transfer->stride + - transfer->box.x / util_format_get_blockwidth(format) * util_format_get_blocksize(format); -} - -void r600_texture_transfer_unmap(struct pipe_context *ctx, - struct pipe_transfer* transfer) -{ - struct r600_transfer *rtransfer = (struct r600_transfer*)transfer; - struct r600_texture *rtex = (struct r600_texture*)transfer->resource; - - pipe_buffer_unmap(ctx, rtex->buffer, rtransfer->buffer_transfer); -} - static void r600_destroy_screen(struct pipe_screen* pscreen) { struct r600_screen* rscreen = r600_screen(pscreen); diff --git a/src/gallium/drivers/r600/r600_screen.h b/src/gallium/drivers/r600/r600_screen.h index 7a373cd0ef..9a452ecfe3 100644 --- a/src/gallium/drivers/r600/r600_screen.h +++ b/src/gallium/drivers/r600/r600_screen.h @@ -40,14 +40,6 @@ struct r600_transfer { unsigned offset; }; -struct r600_buffer { - struct u_resource b; - struct radeon_bo *bo; - u32 domain; - u32 flink; - struct pb_buffer *pb; -}; - struct r600_screen { struct pipe_screen screen; struct radeon *rw; diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index 2fdcdea14e..7d67e28c06 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -24,12 +24,12 @@ * Jerome Glisse */ #include -#include -#include -#include +#include "util/u_inlines.h" +#include "util/u_format.h" +#include "util/u_memory.h" #include "r600_screen.h" -#include "r600_texture.h" #include "r600_context.h" +#include "r600_resource.h" #include "r600d.h" @@ -90,8 +90,8 @@ static void r600_set_framebuffer_state(struct pipe_context *ctx, { struct r600_screen *rscreen = r600_screen(ctx->screen); struct r600_context *rctx = r600_context(ctx); - struct r600_texture *rtex; - struct r600_buffer *rbuffer; + struct r600_resource_texture *rtex; + struct r600_resource *rbuffer; struct radeon_state *rstate; unsigned level = state->cbufs[0]->level; unsigned pitch, slice; @@ -99,8 +99,8 @@ static void r600_set_framebuffer_state(struct pipe_context *ctx, rstate = radeon_state(rscreen->rw, R600_CB0_TYPE, R600_CB0); if (rstate == NULL) return; - rtex = (struct r600_texture*)state->cbufs[0]->texture; - rbuffer = (struct r600_buffer*)rtex->buffer; + rtex = (struct r600_resource_texture*)state->cbufs[0]->texture; + rbuffer = &rtex->resource; rstate->bo[0] = radeon_bo_incref(rscreen->rw, rbuffer->bo); rstate->bo[1] = radeon_bo_incref(rscreen->rw, rbuffer->bo); rstate->bo[2] = radeon_bo_incref(rscreen->rw, rbuffer->bo); @@ -125,8 +125,8 @@ static void r600_set_framebuffer_state(struct pipe_context *ctx, radeon_draw_set_new(rctx->draw, rstate); rctx->db = radeon_state_decref(rctx->db); if(state->zsbuf) { - rtex = (struct r600_texture*)state->zsbuf->texture; - rbuffer = (struct r600_buffer*)rtex->buffer; + rtex = (struct r600_resource_texture*)state->zsbuf->texture; + rbuffer = &rtex->resource; rctx->db = radeon_state(rscreen->rw, R600_DB_TYPE, R600_DB); if(rctx->db == NULL) return; @@ -397,8 +397,8 @@ static struct pipe_sampler_view *r600_create_sampler_view(struct pipe_context *c struct r600_screen *rscreen = r600_screen(ctx->screen); struct r600_texture_resource *rtexture; const struct util_format_description *desc; - struct r600_texture *tmp; - struct r600_buffer *rbuffer; + struct r600_resource_texture *tmp; + struct r600_resource *rbuffer; unsigned format; if (r600_conv_pipe_format(texture->format, &format)) @@ -419,8 +419,8 @@ static struct pipe_sampler_view *r600_create_sampler_view(struct pipe_context *c pipe_resource_reference(&rtexture->view.texture, texture); rtexture->view.context = ctx; - tmp = (struct r600_texture*)texture; - rbuffer = (struct r600_buffer*)tmp->buffer; + tmp = (struct r600_resource_texture*)texture; + rbuffer = &tmp->resource; rtexture->state->bo[0] = radeon_bo_incref(rscreen->rw, rbuffer->bo); rtexture->state->bo[1] = radeon_bo_incref(rscreen->rw, rbuffer->bo); rtexture->state->nbo = 2; diff --git a/src/gallium/drivers/r600/r600_texture.c b/src/gallium/drivers/r600/r600_texture.c index 1c219a5579..ab20e97948 100644 --- a/src/gallium/drivers/r600/r600_texture.c +++ b/src/gallium/drivers/r600/r600_texture.c @@ -32,16 +32,18 @@ #include "state_tracker/drm_driver.h" #include "r600_screen.h" #include "r600_context.h" -#include "r600_texture.h" +#include "r600_resource.h" #include "r600d.h" extern struct u_resource_vtbl r600_texture_vtbl; -unsigned long r600_texture_get_offset(struct r600_texture *rtex, unsigned level, unsigned zslice, unsigned face) +static unsigned long r600_texture_get_offset(struct r600_resource_texture *rtex, + unsigned level, unsigned zslice, + unsigned face) { unsigned long offset = rtex->offset[level]; - switch (rtex->b.b.target) { + switch (rtex->resource.base.b.target) { case PIPE_TEXTURE_3D: assert(face == 0); return offset + zslice * rtex->layer_size[level]; @@ -54,9 +56,9 @@ unsigned long r600_texture_get_offset(struct r600_texture *rtex, unsigned level, } } -static void r600_setup_miptree(struct r600_screen *rscreen, struct r600_texture *rtex) +static void r600_setup_miptree(struct r600_screen *rscreen, struct r600_resource_texture *rtex) { - struct pipe_resource *ptex = &rtex->b.b; + struct pipe_resource *ptex = &rtex->resource.base.b; unsigned long w, h, stride, size, layer_size, i, offset; for (i = 0, offset = 0; i <= ptex->last_level; i++) { @@ -80,43 +82,44 @@ static void r600_setup_miptree(struct r600_screen *rscreen, struct r600_texture } struct pipe_resource *r600_texture_create(struct pipe_screen *screen, - const struct pipe_resource *templ) + const struct pipe_resource *templ) { - struct r600_texture *rtex = CALLOC_STRUCT(r600_texture); + struct r600_resource_texture *rtex; + struct r600_resource *resource; struct r600_screen *rscreen = r600_screen(screen); - struct pipe_resource templ_buf; + rtex = CALLOC_STRUCT(r600_resource_texture); if (!rtex) { return NULL; } - rtex->b.b = *templ; - rtex->b.vtbl = &r600_texture_vtbl; - pipe_reference_init(&rtex->b.b.reference, 1); - rtex->b.b.screen = screen; + resource = &rtex->resource; + resource->base.b = *templ; + resource->base.vtbl = &r600_texture_vtbl; + pipe_reference_init(&resource->base.b.reference, 1); + resource->base.b.screen = screen; r600_setup_miptree(rscreen, rtex); - memset(&templ_buf, 0, sizeof(struct pipe_resource)); - templ_buf.target = PIPE_BUFFER; - templ_buf.format = PIPE_FORMAT_R8_UNORM; - templ_buf.usage = templ->usage; - templ_buf.bind = templ->bind; - templ_buf.width0 = rtex->size; - templ_buf.height0 = 1; - templ_buf.depth0 = 1; - - rtex->buffer = screen->resource_create(screen, &templ_buf); - if (!rtex->buffer) { + /* FIXME alignment 4096 enought ? too much ? */ + resource->domain = r600_domain_from_usage(resource->base.b.bind); + resource->bo = radeon_bo(rscreen->rw, 0, rtex->size, 4096, NULL); + if (resource->bo == NULL) { FREE(rtex); return NULL; } - return &rtex->b.b; + + return &resource->base.b; } static void r600_texture_destroy(struct pipe_screen *screen, struct pipe_resource *ptex) { - struct r600_texture *rtex = (struct r600_texture*)ptex; + struct r600_resource_texture *rtex = (struct r600_resource_texture*)ptex; + struct r600_resource *resource = &rtex->resource; + struct r600_screen *rscreen = r600_screen(screen); + if (resource->bo) { + radeon_bo_decref(rscreen->rw, resource->bo); + } FREE(rtex); } @@ -125,7 +128,7 @@ static struct pipe_surface *r600_get_tex_surface(struct pipe_screen *screen, unsigned face, unsigned level, unsigned zslice, unsigned flags) { - struct r600_texture *rtex = (struct r600_texture*)texture; + struct r600_resource_texture *rtex = (struct r600_resource_texture*)texture; struct pipe_surface *surface = CALLOC_STRUCT(pipe_surface); unsigned long offset; @@ -153,71 +156,111 @@ static void r600_tex_surface_destroy(struct pipe_surface *surface) } struct pipe_resource *r600_texture_from_handle(struct pipe_screen *screen, - const struct pipe_resource *base, + const struct pipe_resource *templ, struct winsys_handle *whandle) { - struct pipe_resource *buffer; - struct r600_texture *rtex; + struct radeon *rw = (struct radeon*)screen->winsys; + struct r600_resource_texture *rtex; + struct r600_resource *resource; + struct radeon_bo *bo = NULL; - buffer = r600_buffer_from_handle(screen, whandle); - if (buffer == NULL) { + bo = radeon_bo(rw, whandle->handle, 0, 0, NULL); + if (bo == NULL) { return NULL; } /* Support only 2D textures without mipmaps */ - if (base->target != PIPE_TEXTURE_2D || base->depth0 != 1 || base->last_level != 0) + if (templ->target != PIPE_TEXTURE_2D || templ->depth0 != 1 || templ->last_level != 0) return NULL; - rtex = CALLOC_STRUCT(r600_texture); + rtex = CALLOC_STRUCT(r600_resource_texture); if (rtex == NULL) return NULL; - /* one ref already taken */ - rtex->buffer = buffer; - - rtex->b.b = *base; - rtex->b.vtbl = &r600_texture_vtbl; - pipe_reference_init(&rtex->b.b.reference, 1); - rtex->b.b.screen = screen; + resource = &rtex->resource; + resource->base.b = *templ; + resource->base.vtbl = &r600_texture_vtbl; + pipe_reference_init(&resource->base.b.reference, 1); + resource->base.b.screen = screen; + resource->bo = bo; rtex->stride_override = whandle->stride; - rtex->pitch[0] = whandle->stride / util_format_get_blocksize(base->format); + rtex->pitch[0] = whandle->stride / util_format_get_blocksize(templ->format); rtex->stride[0] = whandle->stride; rtex->offset[0] = 0; - rtex->size = align(rtex->stride[0] * base->height0, 32); + rtex->size = align(rtex->stride[0] * templ->height0, 32); - return &rtex->b.b; + return &resource->base.b; } -static boolean r600_texture_get_handle(struct pipe_screen* screen, - struct pipe_resource *texture, - struct winsys_handle *whandle) +static unsigned int r600_texture_is_referenced(struct pipe_context *context, + struct pipe_resource *texture, + unsigned face, unsigned level) { - struct r600_screen *rscreen = r600_screen(screen); - struct r600_texture* rtex = (struct r600_texture*)texture; + /* FIXME */ + return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE; +} - if (!rtex) { - return FALSE; - } +struct pipe_transfer* r600_texture_get_transfer(struct pipe_context *ctx, + struct pipe_resource *texture, + struct pipe_subresource sr, + unsigned usage, + const struct pipe_box *box) +{ + struct r600_resource_texture *rtex = (struct r600_resource_texture*)texture; + struct r600_transfer *trans; - whandle->stride = rtex->stride[0]; + trans = CALLOC_STRUCT(r600_transfer); + if (trans == NULL) + return NULL; + pipe_resource_reference(&trans->transfer.resource, texture); + trans->transfer.sr = sr; + trans->transfer.usage = usage; + trans->transfer.box = *box; + trans->transfer.stride = rtex->stride[sr.level]; + trans->offset = r600_texture_get_offset(rtex, sr.level, box->z, sr.face); + return &trans->transfer; +} + +void r600_texture_transfer_destroy(struct pipe_context *ctx, + struct pipe_transfer *trans) +{ + pipe_resource_reference(&trans->resource, NULL); + FREE(trans); +} + +void* r600_texture_transfer_map(struct pipe_context *ctx, + struct pipe_transfer* transfer) +{ + struct r600_transfer *rtransfer = (struct r600_transfer*)transfer; + struct r600_resource *resource; + enum pipe_format format = transfer->resource->format; + struct r600_screen *rscreen = r600_screen(ctx->screen); + char *map; - r600_buffer_get_handle(rscreen->rw, rtex->buffer, whandle); + resource = (struct r600_resource *)transfer->resource; + if (radeon_bo_map(rscreen->rw, resource->bo)) { + return NULL; + } + map = resource->bo->data; - return TRUE; + return map + rtransfer->offset + + transfer->box.y / util_format_get_blockheight(format) * transfer->stride + + transfer->box.x / util_format_get_blockwidth(format) * util_format_get_blocksize(format); } -static unsigned int r600_texture_is_referenced(struct pipe_context *context, - struct pipe_resource *texture, - unsigned face, unsigned level) +void r600_texture_transfer_unmap(struct pipe_context *ctx, + struct pipe_transfer* transfer) { - struct r600_texture *rtex = (struct r600_texture*)texture; + struct r600_screen *rscreen = r600_screen(ctx->screen); + struct r600_resource *resource; - return r600_buffer_is_referenced_by_cs(context, rtex->buffer, face, level); + resource = (struct r600_resource *)transfer->resource; + radeon_bo_unmap(rscreen->rw, resource->bo); } struct u_resource_vtbl r600_texture_vtbl = { - r600_texture_get_handle, /* get_handle */ + u_default_resource_get_handle, /* get_handle */ r600_texture_destroy, /* resource_destroy */ r600_texture_is_referenced, /* is_resource_referenced */ r600_texture_get_transfer, /* get_transfer */ diff --git a/src/gallium/drivers/r600/r600_texture.h b/src/gallium/drivers/r600/r600_texture.h deleted file mode 100644 index 9bc08d6b04..0000000000 --- a/src/gallium/drivers/r600/r600_texture.h +++ /dev/null @@ -1,53 +0,0 @@ -/* - * Copyright 2010 Jerome Glisse - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - */ -#ifndef R600_TEXTURE_H -#define R600_TEXTURE_H - -#include - -struct r600_texture { - struct u_resource b; - unsigned long offset[PIPE_MAX_TEXTURE_LEVELS]; - unsigned long pitch[PIPE_MAX_TEXTURE_LEVELS]; - unsigned long stride[PIPE_MAX_TEXTURE_LEVELS]; - unsigned long layer_size[PIPE_MAX_TEXTURE_LEVELS]; - unsigned long stride_override; - unsigned long size; - struct pipe_resource *buffer; -}; - -struct pipe_resource *r600_texture_create(struct pipe_screen *screen, - const struct pipe_resource *templ); -unsigned long r600_texture_get_offset(struct r600_texture *rtex, unsigned level, unsigned zslice, unsigned face); -struct pipe_resource *r600_texture_from_handle(struct pipe_screen *screen, - const struct pipe_resource *base, - struct winsys_handle *whandle); -void r600_init_screen_texture_functions(struct pipe_screen *screen); - -/* This should be implemented by winsys. */ -boolean r600_buffer_get_handle(struct radeon *rw, - struct pipe_resource *buf, - struct winsys_handle *whandle); - - -#endif -- cgit v1.2.3 From 6f0f6c64596b7bbbfa96e8af6715565e37efa91e Mon Sep 17 00:00:00 2001 From: Jerome Glisse Date: Wed, 28 Jul 2010 19:33:50 -0400 Subject: r600g: split pipe state creating/binding from hw state creation Split hw vs pipe states creation handling as hw states group doesn't match pipe state group exactly. Right now be dumb about that and rebuild all hw states on each draw call. More optimization on that side coming. Signed-off-by: Jerome Glisse --- src/gallium/drivers/r600/r600_blit.c | 48 +- src/gallium/drivers/r600/r600_buffer.c | 1 + src/gallium/drivers/r600/r600_context.c | 152 ++-- src/gallium/drivers/r600/r600_context.h | 147 +++- src/gallium/drivers/r600/r600_draw.c | 34 +- src/gallium/drivers/r600/r600_resource.c | 1 - src/gallium/drivers/r600/r600_shader.c | 47 +- src/gallium/drivers/r600/r600_state.c | 1273 +++++++++++++++++++----------- src/gallium/winsys/r600/drm/r600_drm.c | 4 +- 9 files changed, 1086 insertions(+), 621 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/drivers/r600/r600_blit.c b/src/gallium/drivers/r600/r600_blit.c index 1dcb19babc..cc37227ead 100644 --- a/src/gallium/drivers/r600/r600_blit.c +++ b/src/gallium/drivers/r600/r600_blit.c @@ -34,31 +34,37 @@ static void r600_blitter_save_states(struct r600_context *rctx) { - util_blitter_save_blend(rctx->blitter, - rctx->draw->state[R600_BLEND]); - util_blitter_save_depth_stencil_alpha(rctx->blitter, - rctx->draw->state[R600_DSA]); - util_blitter_save_stencil_ref(rctx->blitter, &rctx->stencil_ref); - util_blitter_save_rasterizer(rctx->blitter, - rctx->draw->state[R600_RASTERIZER]); - util_blitter_save_fragment_shader(rctx->blitter, - rctx->ps_shader); - util_blitter_save_vertex_shader(rctx->blitter, - rctx->vs_shader); - util_blitter_save_vertex_elements(rctx->blitter, - rctx->vertex_elements); - util_blitter_save_viewport(rctx->blitter, - &rctx->viewport); + util_blitter_save_blend(rctx->blitter, rctx->blend); + util_blitter_save_depth_stencil_alpha(rctx->blitter, rctx->dsa); + if (rctx->stencil_ref) { + util_blitter_save_stencil_ref(rctx->blitter, + &rctx->stencil_ref->state.stencil_ref); + } + util_blitter_save_rasterizer(rctx->blitter, rctx->rasterizer); + util_blitter_save_fragment_shader(rctx->blitter, rctx->ps_shader); + util_blitter_save_vertex_shader(rctx->blitter, rctx->vs_shader); + util_blitter_save_vertex_elements(rctx->blitter, rctx->vertex_elements); + if (rctx->viewport) { + util_blitter_save_viewport(rctx->blitter, &rctx->viewport->state.viewport); + } /* XXX util_blitter_save_clip(rctx->blitter, &rctx->clip); */ util_blitter_save_vertex_buffers(rctx->blitter, rctx->nvertex_buffer, - rctx->vertex_buffer); + rctx->vertex_buffer); + + /* remove ptr so they don't get deleted */ + rctx->blend = NULL; + rctx->vs_shader = NULL; + rctx->ps_shader = NULL; + rctx->rasterizer = NULL; + rctx->dsa = NULL; + rctx->vertex_elements = NULL; } static void r600_clear(struct pipe_context *ctx, unsigned buffers, const float *rgba, double depth, unsigned stencil) { struct r600_context *rctx = r600_context(ctx); - struct pipe_framebuffer_state *fb = &rctx->fb_state; + struct pipe_framebuffer_state *fb = &rctx->framebuffer->state.framebuffer; r600_blitter_save_states(rctx); util_blitter_clear(rctx->blitter, fb->width, fb->height, @@ -73,12 +79,14 @@ static void r600_clear_render_target(struct pipe_context *pipe, unsigned width, unsigned height) { struct r600_context *rctx = r600_context(pipe); + struct pipe_framebuffer_state *fb = &rctx->framebuffer->state.framebuffer; r600_blitter_save_states(rctx); - util_blitter_save_framebuffer(rctx->blitter, &rctx->fb_state); + util_blitter_save_framebuffer(rctx->blitter, fb); util_blitter_clear_render_target(rctx->blitter, dst, rgba, dstx, dsty, width, height); +R600_ERR("vtx elem %p\n", rctx->vertex_elements); } static void r600_clear_depth_stencil(struct pipe_context *pipe, @@ -90,12 +98,14 @@ static void r600_clear_depth_stencil(struct pipe_context *pipe, unsigned width, unsigned height) { struct r600_context *rctx = r600_context(pipe); + struct pipe_framebuffer_state *fb = &rctx->framebuffer->state.framebuffer; r600_blitter_save_states(rctx); - util_blitter_save_framebuffer(rctx->blitter, &rctx->fb_state); + util_blitter_save_framebuffer(rctx->blitter, fb); util_blitter_clear_depth_stencil(rctx->blitter, dst, clear_flags, depth, stencil, dstx, dsty, width, height); +R600_ERR("vtx elem %p\n", rctx->vertex_elements); } static void r600_resource_copy_region(struct pipe_context *pipe, diff --git a/src/gallium/drivers/r600/r600_buffer.c b/src/gallium/drivers/r600/r600_buffer.c index 167d117520..7829a479c2 100644 --- a/src/gallium/drivers/r600/r600_buffer.c +++ b/src/gallium/drivers/r600/r600_buffer.c @@ -141,6 +141,7 @@ static void r600_buffer_destroy(struct pipe_screen *screen, if (rbuffer->bo) { radeon_bo_decref(rscreen->rw, rbuffer->bo); } + memset(rbuffer, 0, sizeof(struct r600_resource)); FREE(rbuffer); } diff --git a/src/gallium/drivers/r600/r600_context.c b/src/gallium/drivers/r600/r600_context.c index f2875f4380..4c7b67ea52 100644 --- a/src/gallium/drivers/r600/r600_context.c +++ b/src/gallium/drivers/r600/r600_context.c @@ -220,9 +220,9 @@ static void r600_init_config(struct r600_context *rctx) printf("num_gs_stack_entries : %d\n", num_gs_stack_entries); printf("num_es_stack_entries : %d\n", num_es_stack_entries); - rctx->config = radeon_state(rctx->rw, R600_CONFIG_TYPE, R600_CONFIG); + rctx->hw_states.config = radeon_state(rctx->rw, R600_CONFIG_TYPE, R600_CONFIG); - rctx->config->states[R600_CONFIG__SQ_CONFIG] = 0x00000000; + rctx->hw_states.config->states[R600_CONFIG__SQ_CONFIG] = 0x00000000; switch (family) { case CHIP_RV610: case CHIP_RV620: @@ -231,75 +231,75 @@ static void r600_init_config(struct r600_context *rctx) case CHIP_RV710: break; default: - rctx->config->states[R600_CONFIG__SQ_CONFIG] |= S_008C00_VC_ENABLE(1); + rctx->hw_states.config->states[R600_CONFIG__SQ_CONFIG] |= S_008C00_VC_ENABLE(1); break; } - rctx->config->states[R600_CONFIG__SQ_CONFIG] |= S_008C00_DX9_CONSTS(1); - rctx->config->states[R600_CONFIG__SQ_CONFIG] |= S_008C00_ALU_INST_PREFER_VECTOR(1); - rctx->config->states[R600_CONFIG__SQ_CONFIG] |= S_008C00_PS_PRIO(ps_prio); - rctx->config->states[R600_CONFIG__SQ_CONFIG] |= S_008C00_VS_PRIO(vs_prio); - rctx->config->states[R600_CONFIG__SQ_CONFIG] |= S_008C00_GS_PRIO(gs_prio); - rctx->config->states[R600_CONFIG__SQ_CONFIG] |= S_008C00_ES_PRIO(es_prio); + rctx->hw_states.config->states[R600_CONFIG__SQ_CONFIG] |= S_008C00_DX9_CONSTS(1); + rctx->hw_states.config->states[R600_CONFIG__SQ_CONFIG] |= S_008C00_ALU_INST_PREFER_VECTOR(1); + rctx->hw_states.config->states[R600_CONFIG__SQ_CONFIG] |= S_008C00_PS_PRIO(ps_prio); + rctx->hw_states.config->states[R600_CONFIG__SQ_CONFIG] |= S_008C00_VS_PRIO(vs_prio); + rctx->hw_states.config->states[R600_CONFIG__SQ_CONFIG] |= S_008C00_GS_PRIO(gs_prio); + rctx->hw_states.config->states[R600_CONFIG__SQ_CONFIG] |= S_008C00_ES_PRIO(es_prio); - rctx->config->states[R600_CONFIG__SQ_GPR_RESOURCE_MGMT_1] = 0; - rctx->config->states[R600_CONFIG__SQ_GPR_RESOURCE_MGMT_1] |= S_008C04_NUM_PS_GPRS(num_ps_gprs); - rctx->config->states[R600_CONFIG__SQ_GPR_RESOURCE_MGMT_1] |= S_008C04_NUM_VS_GPRS(num_vs_gprs); - rctx->config->states[R600_CONFIG__SQ_GPR_RESOURCE_MGMT_1] |= S_008C04_NUM_CLAUSE_TEMP_GPRS(num_temp_gprs); + rctx->hw_states.config->states[R600_CONFIG__SQ_GPR_RESOURCE_MGMT_1] = 0; + rctx->hw_states.config->states[R600_CONFIG__SQ_GPR_RESOURCE_MGMT_1] |= S_008C04_NUM_PS_GPRS(num_ps_gprs); + rctx->hw_states.config->states[R600_CONFIG__SQ_GPR_RESOURCE_MGMT_1] |= S_008C04_NUM_VS_GPRS(num_vs_gprs); + rctx->hw_states.config->states[R600_CONFIG__SQ_GPR_RESOURCE_MGMT_1] |= S_008C04_NUM_CLAUSE_TEMP_GPRS(num_temp_gprs); - rctx->config->states[R600_CONFIG__SQ_GPR_RESOURCE_MGMT_2] = 0; - rctx->config->states[R600_CONFIG__SQ_GPR_RESOURCE_MGMT_2] |= S_008C08_NUM_GS_GPRS(num_gs_gprs); - rctx->config->states[R600_CONFIG__SQ_GPR_RESOURCE_MGMT_2] |= S_008C08_NUM_GS_GPRS(num_es_gprs); + rctx->hw_states.config->states[R600_CONFIG__SQ_GPR_RESOURCE_MGMT_2] = 0; + rctx->hw_states.config->states[R600_CONFIG__SQ_GPR_RESOURCE_MGMT_2] |= S_008C08_NUM_GS_GPRS(num_gs_gprs); + rctx->hw_states.config->states[R600_CONFIG__SQ_GPR_RESOURCE_MGMT_2] |= S_008C08_NUM_GS_GPRS(num_es_gprs); - rctx->config->states[R600_CONFIG__SQ_THREAD_RESOURCE_MGMT] = 0; - rctx->config->states[R600_CONFIG__SQ_THREAD_RESOURCE_MGMT] |= S_008C0C_NUM_PS_THREADS(num_ps_threads); - rctx->config->states[R600_CONFIG__SQ_THREAD_RESOURCE_MGMT] |= S_008C0C_NUM_VS_THREADS(num_vs_threads); - rctx->config->states[R600_CONFIG__SQ_THREAD_RESOURCE_MGMT] |= S_008C0C_NUM_GS_THREADS(num_gs_threads); - rctx->config->states[R600_CONFIG__SQ_THREAD_RESOURCE_MGMT] |= S_008C0C_NUM_ES_THREADS(num_es_threads); + rctx->hw_states.config->states[R600_CONFIG__SQ_THREAD_RESOURCE_MGMT] = 0; + rctx->hw_states.config->states[R600_CONFIG__SQ_THREAD_RESOURCE_MGMT] |= S_008C0C_NUM_PS_THREADS(num_ps_threads); + rctx->hw_states.config->states[R600_CONFIG__SQ_THREAD_RESOURCE_MGMT] |= S_008C0C_NUM_VS_THREADS(num_vs_threads); + rctx->hw_states.config->states[R600_CONFIG__SQ_THREAD_RESOURCE_MGMT] |= S_008C0C_NUM_GS_THREADS(num_gs_threads); + rctx->hw_states.config->states[R600_CONFIG__SQ_THREAD_RESOURCE_MGMT] |= S_008C0C_NUM_ES_THREADS(num_es_threads); - rctx->config->states[R600_CONFIG__SQ_STACK_RESOURCE_MGMT_1] = 0; - rctx->config->states[R600_CONFIG__SQ_STACK_RESOURCE_MGMT_1] |= S_008C10_NUM_PS_STACK_ENTRIES(num_ps_stack_entries); - rctx->config->states[R600_CONFIG__SQ_STACK_RESOURCE_MGMT_1] |= S_008C10_NUM_VS_STACK_ENTRIES(num_vs_stack_entries); + rctx->hw_states.config->states[R600_CONFIG__SQ_STACK_RESOURCE_MGMT_1] = 0; + rctx->hw_states.config->states[R600_CONFIG__SQ_STACK_RESOURCE_MGMT_1] |= S_008C10_NUM_PS_STACK_ENTRIES(num_ps_stack_entries); + rctx->hw_states.config->states[R600_CONFIG__SQ_STACK_RESOURCE_MGMT_1] |= S_008C10_NUM_VS_STACK_ENTRIES(num_vs_stack_entries); - rctx->config->states[R600_CONFIG__SQ_STACK_RESOURCE_MGMT_2] = 0; - rctx->config->states[R600_CONFIG__SQ_STACK_RESOURCE_MGMT_2] |= S_008C14_NUM_GS_STACK_ENTRIES(num_gs_stack_entries); - rctx->config->states[R600_CONFIG__SQ_STACK_RESOURCE_MGMT_2] |= S_008C14_NUM_ES_STACK_ENTRIES(num_es_stack_entries); + rctx->hw_states.config->states[R600_CONFIG__SQ_STACK_RESOURCE_MGMT_2] = 0; + rctx->hw_states.config->states[R600_CONFIG__SQ_STACK_RESOURCE_MGMT_2] |= S_008C14_NUM_GS_STACK_ENTRIES(num_gs_stack_entries); + rctx->hw_states.config->states[R600_CONFIG__SQ_STACK_RESOURCE_MGMT_2] |= S_008C14_NUM_ES_STACK_ENTRIES(num_es_stack_entries); - rctx->config->states[R600_CONFIG__SQ_DYN_GPR_CNTL_PS_FLUSH_REQ] = 0x00004000; - rctx->config->states[R600_CONFIG__TA_CNTL_AUX] = 0x07000002; - rctx->config->states[R600_CONFIG__VC_ENHANCE] = 0x00000000; - rctx->config->states[R600_CONFIG__DB_DEBUG] = 0x00000000; - rctx->config->states[R600_CONFIG__DB_WATERMARKS] = 0x00420204; - rctx->config->states[R600_CONFIG__SX_MISC] = 0x00000000; - rctx->config->states[R600_CONFIG__SPI_THREAD_GROUPING] = 0x00000001; - rctx->config->states[R600_CONFIG__CB_SHADER_CONTROL] = 0x00000003; - rctx->config->states[R600_CONFIG__SQ_ESGS_RING_ITEMSIZE] = 0x00000000; - rctx->config->states[R600_CONFIG__SQ_GSVS_RING_ITEMSIZE] = 0x00000000; - rctx->config->states[R600_CONFIG__SQ_ESTMP_RING_ITEMSIZE] = 0x00000000; - rctx->config->states[R600_CONFIG__SQ_GSTMP_RING_ITEMSIZE] = 0x00000000; - rctx->config->states[R600_CONFIG__SQ_VSTMP_RING_ITEMSIZE] = 0x00000000; - rctx->config->states[R600_CONFIG__SQ_PSTMP_RING_ITEMSIZE] = 0x00000000; - rctx->config->states[R600_CONFIG__SQ_FBUF_RING_ITEMSIZE] = 0x00000000; - rctx->config->states[R600_CONFIG__SQ_REDUC_RING_ITEMSIZE] = 0x00000000; - rctx->config->states[R600_CONFIG__SQ_GS_VERT_ITEMSIZE] = 0x00000000; - rctx->config->states[R600_CONFIG__VGT_OUTPUT_PATH_CNTL] = 0x00000000; - rctx->config->states[R600_CONFIG__VGT_HOS_CNTL] = 0x00000000; - rctx->config->states[R600_CONFIG__VGT_HOS_MAX_TESS_LEVEL] = 0x00000000; - rctx->config->states[R600_CONFIG__VGT_HOS_MIN_TESS_LEVEL] = 0x00000000; - rctx->config->states[R600_CONFIG__VGT_HOS_REUSE_DEPTH] = 0x00000000; - rctx->config->states[R600_CONFIG__VGT_GROUP_PRIM_TYPE] = 0x00000000; - rctx->config->states[R600_CONFIG__VGT_GROUP_FIRST_DECR] = 0x00000000; - rctx->config->states[R600_CONFIG__VGT_GROUP_DECR] = 0x00000000; - rctx->config->states[R600_CONFIG__VGT_GROUP_VECT_0_CNTL] = 0x00000000; - rctx->config->states[R600_CONFIG__VGT_GROUP_VECT_1_CNTL] = 0x00000000; - rctx->config->states[R600_CONFIG__VGT_GROUP_VECT_0_FMT_CNTL] = 0x00000000; - rctx->config->states[R600_CONFIG__VGT_GROUP_VECT_1_FMT_CNTL] = 0x00000000; - rctx->config->states[R600_CONFIG__VGT_GS_MODE] = 0x00000000; - rctx->config->states[R600_CONFIG__PA_SC_MODE_CNTL] = 0x00514000; - rctx->config->states[R600_CONFIG__VGT_STRMOUT_EN] = 0x00000000; - rctx->config->states[R600_CONFIG__VGT_REUSE_OFF] = 0x00000001; - rctx->config->states[R600_CONFIG__VGT_VTX_CNT_EN] = 0x00000000; - rctx->config->states[R600_CONFIG__VGT_STRMOUT_BUFFER_EN] = 0x00000000; - radeon_state_pm4(rctx->config); + rctx->hw_states.config->states[R600_CONFIG__SQ_DYN_GPR_CNTL_PS_FLUSH_REQ] = 0x00004000; + rctx->hw_states.config->states[R600_CONFIG__TA_CNTL_AUX] = 0x07000002; + rctx->hw_states.config->states[R600_CONFIG__VC_ENHANCE] = 0x00000000; + rctx->hw_states.config->states[R600_CONFIG__DB_DEBUG] = 0x00000000; + rctx->hw_states.config->states[R600_CONFIG__DB_WATERMARKS] = 0x00420204; + rctx->hw_states.config->states[R600_CONFIG__SX_MISC] = 0x00000000; + rctx->hw_states.config->states[R600_CONFIG__SPI_THREAD_GROUPING] = 0x00000001; + rctx->hw_states.config->states[R600_CONFIG__CB_SHADER_CONTROL] = 0x00000003; + rctx->hw_states.config->states[R600_CONFIG__SQ_ESGS_RING_ITEMSIZE] = 0x00000000; + rctx->hw_states.config->states[R600_CONFIG__SQ_GSVS_RING_ITEMSIZE] = 0x00000000; + rctx->hw_states.config->states[R600_CONFIG__SQ_ESTMP_RING_ITEMSIZE] = 0x00000000; + rctx->hw_states.config->states[R600_CONFIG__SQ_GSTMP_RING_ITEMSIZE] = 0x00000000; + rctx->hw_states.config->states[R600_CONFIG__SQ_VSTMP_RING_ITEMSIZE] = 0x00000000; + rctx->hw_states.config->states[R600_CONFIG__SQ_PSTMP_RING_ITEMSIZE] = 0x00000000; + rctx->hw_states.config->states[R600_CONFIG__SQ_FBUF_RING_ITEMSIZE] = 0x00000000; + rctx->hw_states.config->states[R600_CONFIG__SQ_REDUC_RING_ITEMSIZE] = 0x00000000; + rctx->hw_states.config->states[R600_CONFIG__SQ_GS_VERT_ITEMSIZE] = 0x00000000; + rctx->hw_states.config->states[R600_CONFIG__VGT_OUTPUT_PATH_CNTL] = 0x00000000; + rctx->hw_states.config->states[R600_CONFIG__VGT_HOS_CNTL] = 0x00000000; + rctx->hw_states.config->states[R600_CONFIG__VGT_HOS_MAX_TESS_LEVEL] = 0x00000000; + rctx->hw_states.config->states[R600_CONFIG__VGT_HOS_MIN_TESS_LEVEL] = 0x00000000; + rctx->hw_states.config->states[R600_CONFIG__VGT_HOS_REUSE_DEPTH] = 0x00000000; + rctx->hw_states.config->states[R600_CONFIG__VGT_GROUP_PRIM_TYPE] = 0x00000000; + rctx->hw_states.config->states[R600_CONFIG__VGT_GROUP_FIRST_DECR] = 0x00000000; + rctx->hw_states.config->states[R600_CONFIG__VGT_GROUP_DECR] = 0x00000000; + rctx->hw_states.config->states[R600_CONFIG__VGT_GROUP_VECT_0_CNTL] = 0x00000000; + rctx->hw_states.config->states[R600_CONFIG__VGT_GROUP_VECT_1_CNTL] = 0x00000000; + rctx->hw_states.config->states[R600_CONFIG__VGT_GROUP_VECT_0_FMT_CNTL] = 0x00000000; + rctx->hw_states.config->states[R600_CONFIG__VGT_GROUP_VECT_1_FMT_CNTL] = 0x00000000; + rctx->hw_states.config->states[R600_CONFIG__VGT_GS_MODE] = 0x00000000; + rctx->hw_states.config->states[R600_CONFIG__PA_SC_MODE_CNTL] = 0x00514000; + rctx->hw_states.config->states[R600_CONFIG__VGT_STRMOUT_EN] = 0x00000000; + rctx->hw_states.config->states[R600_CONFIG__VGT_REUSE_OFF] = 0x00000001; + rctx->hw_states.config->states[R600_CONFIG__VGT_VTX_CNT_EN] = 0x00000000; + rctx->hw_states.config->states[R600_CONFIG__VGT_STRMOUT_BUFFER_EN] = 0x00000000; + radeon_state_pm4(rctx->hw_states.config); } struct pipe_context *r600_create_context(struct pipe_screen *screen, void *priv) @@ -333,19 +333,19 @@ struct pipe_context *r600_create_context(struct pipe_screen *screen, void *priv) return NULL; } - rctx->cb_cntl = radeon_state(rscreen->rw, R600_CB_CNTL_TYPE, R600_CB_CNTL); - rctx->cb_cntl->states[R600_CB_CNTL__CB_SHADER_MASK] = 0x0000000F; - rctx->cb_cntl->states[R600_CB_CNTL__CB_TARGET_MASK] = 0x0000000F; - rctx->cb_cntl->states[R600_CB_CNTL__CB_COLOR_CONTROL] = 0x00CC0000; - rctx->cb_cntl->states[R600_CB_CNTL__PA_SC_AA_CONFIG] = 0x00000000; - rctx->cb_cntl->states[R600_CB_CNTL__PA_SC_AA_SAMPLE_LOCS_MCTX] = 0x00000000; - rctx->cb_cntl->states[R600_CB_CNTL__PA_SC_AA_SAMPLE_LOCS_8S_WD1_MCTX] = 0x00000000; - rctx->cb_cntl->states[R600_CB_CNTL__CB_CLRCMP_CONTROL] = 0x01000000; - rctx->cb_cntl->states[R600_CB_CNTL__CB_CLRCMP_SRC] = 0x00000000; - rctx->cb_cntl->states[R600_CB_CNTL__CB_CLRCMP_DST] = 0x000000FF; - rctx->cb_cntl->states[R600_CB_CNTL__CB_CLRCMP_MSK] = 0xFFFFFFFF; - rctx->cb_cntl->states[R600_CB_CNTL__PA_SC_AA_MASK] = 0xFFFFFFFF; - radeon_state_pm4(rctx->cb_cntl); + rctx->hw_states.cb_cntl = radeon_state(rscreen->rw, R600_CB_CNTL_TYPE, R600_CB_CNTL); + rctx->hw_states.cb_cntl->states[R600_CB_CNTL__CB_SHADER_MASK] = 0x0000000F; + rctx->hw_states.cb_cntl->states[R600_CB_CNTL__CB_TARGET_MASK] = 0x0000000F; + rctx->hw_states.cb_cntl->states[R600_CB_CNTL__CB_COLOR_CONTROL] = 0x00CC0000; + rctx->hw_states.cb_cntl->states[R600_CB_CNTL__PA_SC_AA_CONFIG] = 0x00000000; + rctx->hw_states.cb_cntl->states[R600_CB_CNTL__PA_SC_AA_SAMPLE_LOCS_MCTX] = 0x00000000; + rctx->hw_states.cb_cntl->states[R600_CB_CNTL__PA_SC_AA_SAMPLE_LOCS_8S_WD1_MCTX] = 0x00000000; + rctx->hw_states.cb_cntl->states[R600_CB_CNTL__CB_CLRCMP_CONTROL] = 0x01000000; + rctx->hw_states.cb_cntl->states[R600_CB_CNTL__CB_CLRCMP_SRC] = 0x00000000; + rctx->hw_states.cb_cntl->states[R600_CB_CNTL__CB_CLRCMP_DST] = 0x000000FF; + rctx->hw_states.cb_cntl->states[R600_CB_CNTL__CB_CLRCMP_MSK] = 0xFFFFFFFF; + rctx->hw_states.cb_cntl->states[R600_CB_CNTL__PA_SC_AA_MASK] = 0xFFFFFFFF; + radeon_state_pm4(rctx->hw_states.cb_cntl); r600_init_config(rctx); diff --git a/src/gallium/drivers/r600/r600_context.h b/src/gallium/drivers/r600/r600_context.h index 30f33f757e..1f03b202ee 100644 --- a/src/gallium/drivers/r600/r600_context.h +++ b/src/gallium/drivers/r600/r600_context.h @@ -34,21 +34,74 @@ #include "r600_shader.h" /* XXX move this to a more appropriate place */ -struct r600_vertex_elements_state -{ - unsigned count; - struct pipe_vertex_element elements[32]; +union pipe_states { + struct pipe_rasterizer_state rasterizer; + struct pipe_poly_stipple poly_stipple; + struct pipe_scissor_state scissor; + struct pipe_clip_state clip; + struct pipe_shader_state shader; + struct pipe_depth_state depth; + struct pipe_stencil_state stencil; + struct pipe_alpha_state alpha; + struct pipe_depth_stencil_alpha_state dsa; + struct pipe_blend_state blend; + struct pipe_blend_color blend_color; + struct pipe_stencil_ref stencil_ref; + struct pipe_framebuffer_state framebuffer; + struct pipe_sampler_state sampler; + struct pipe_sampler_view sampler_view; + struct pipe_viewport_state viewport; }; -struct r600_pipe_shader { - struct r600_shader shader; - struct radeon_bo *bo; - struct radeon_state *state; +enum pipe_state_type { + pipe_rasterizer_type = 1, + pipe_poly_stipple_type, + pipe_scissor_type, + pipe_clip_type, + pipe_shader_type, + pipe_depth_type, + pipe_stencil_type, + pipe_alpha_type, + pipe_dsa_type, + pipe_blend_type, + pipe_stencil_ref_type, + pipe_framebuffer_type, + pipe_sampler_type, + pipe_sampler_view_type, + pipe_viewport_type, + pipe_type_count }; -struct r600_texture_resource { - struct pipe_sampler_view view; - struct radeon_state *state; +struct r600_context_state { + union pipe_states state; + unsigned refcount; + unsigned type; + struct radeon_state *rstate; + struct r600_shader shader; + struct radeon_bo *bo; +}; + +struct r600_vertex_element +{ + unsigned refcount; + unsigned count; + struct pipe_vertex_element elements[32]; +}; + +struct r600_context_hw_states { + struct radeon_state *rasterizer; + struct radeon_state *scissor; + struct radeon_state *dsa; + struct radeon_state *blend; + struct radeon_state *viewport; + struct radeon_state *cb0; + struct radeon_state *config; + struct radeon_state *cb_cntl; + struct radeon_state *db; + unsigned ps_nresource; + unsigned ps_nsampler; + struct radeon_state *ps_resource[160]; + struct radeon_state *ps_sampler[16]; }; struct r600_context { @@ -56,9 +109,11 @@ struct r600_context { struct r600_screen *screen; struct radeon *rw; struct radeon_ctx *ctx; - struct radeon_state *cb_cntl; - struct radeon_state *db; - struct radeon_state *config; + struct blitter_context *blitter; + struct radeon_draw *draw; + /* hw states */ + struct r600_context_hw_states hw_states; +#if 0 struct r600_pipe_shader *ps_shader; struct r600_pipe_shader *vs_shader; unsigned nps_sampler; @@ -71,12 +126,57 @@ struct r600_context { unsigned nvertex_buffer; struct r600_vertex_elements_state *vertex_elements; struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS]; - struct blitter_context *blitter; struct pipe_stencil_ref stencil_ref; struct pipe_framebuffer_state fb_state; - struct radeon_draw *draw; struct pipe_viewport_state viewport; +#endif + /* pipe states */ + unsigned flat_shade; + unsigned ps_nsampler; + unsigned vs_nsampler; + unsigned ps_nsampler_view; + unsigned vs_nsampler_view; + unsigned nvertex_buffer; + struct r600_context_state *rasterizer; + struct r600_context_state *poly_stipple; + struct r600_context_state *scissor; + struct r600_context_state *clip; + struct r600_context_state *ps_shader; + struct r600_context_state *vs_shader; + struct r600_context_state *depth; + struct r600_context_state *stencil; + struct r600_context_state *alpha; + struct r600_context_state *dsa; + struct r600_context_state *blend; + struct r600_context_state *stencil_ref; + struct r600_context_state *viewport; + struct r600_context_state *framebuffer; + struct r600_context_state *ps_sampler[PIPE_MAX_ATTRIBS]; + struct r600_context_state *vs_sampler[PIPE_MAX_ATTRIBS]; + struct r600_context_state *ps_sampler_view[PIPE_MAX_ATTRIBS]; + struct r600_context_state *vs_sampler_view[PIPE_MAX_ATTRIBS]; + struct r600_vertex_element *vertex_elements; + struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS]; +}; + +#if 0 +struct r600_vertex_elements_state +{ + unsigned count; + struct pipe_vertex_element elements[32]; +}; + +struct r600_pipe_shader { + struct r600_shader shader; + struct radeon_bo *bo; + struct radeon_state *state; +}; + +struct r600_texture_resource { + struct pipe_sampler_view view; + struct radeon_state *state; }; +#endif /* Convenience cast wrapper. */ static INLINE struct r600_context *r600_context(struct pipe_context *pipe) @@ -84,6 +184,12 @@ static INLINE struct r600_context *r600_context(struct pipe_context *pipe) return (struct r600_context*)pipe; } +struct r600_context_state *r600_context_state(struct r600_context *rctx, unsigned type, const void *state); +struct r600_context_state *r600_context_state_incref(struct r600_context_state *rstate); +struct r600_context_state *r600_context_state_decref(struct r600_context_state *rstate); + +int r600_context_hw_states(struct r600_context *rctx); + void r600_draw_arrays(struct pipe_context *ctx, unsigned mode, unsigned start, unsigned count); void r600_draw_elements(struct pipe_context *ctx, @@ -101,10 +207,11 @@ void r600_init_state_functions(struct r600_context *rctx); void r600_init_query_functions(struct r600_context* rctx); struct pipe_context *r600_create_context(struct pipe_screen *screen, void *priv); -void r600_pipe_shader_destroy(struct pipe_context *ctx, struct r600_pipe_shader *rpshader); -struct r600_pipe_shader *r600_pipe_shader_create(struct pipe_context *ctx, - const struct tgsi_token *tokens); -int r600_pipe_shader_update(struct pipe_context *ctx, struct r600_pipe_shader *rpshader); +int r600_pipe_shader_create(struct pipe_context *ctx, + struct r600_context_state *rstate, + const struct tgsi_token *tokens); +int r600_pipe_shader_update(struct pipe_context *ctx, + struct r600_context_state *rstate); #define R600_ERR(fmt, args...) \ fprintf(stderr, "EE %s/%s:%d - "fmt, __FILE__, __func__, __LINE__, ##args) diff --git a/src/gallium/drivers/r600/r600_draw.c b/src/gallium/drivers/r600/r600_draw.c index 8e9d11b855..b248beaf8c 100644 --- a/src/gallium/drivers/r600/r600_draw.c +++ b/src/gallium/drivers/r600/r600_draw.c @@ -55,8 +55,12 @@ static int r600_draw_common(struct r600_draw *draw) struct r600_resource *rbuffer; unsigned i, j, offset, format, prim; u32 vgt_dma_index_type, vgt_draw_initiator; + struct pipe_vertex_buffer *vertex_buffer; int r; + r = r600_context_hw_states(rctx); + if (r) + return r; switch (draw->index_size) { case 2: vgt_draw_initiator = 0; @@ -84,26 +88,18 @@ static int r600_draw_common(struct r600_draw *draw) r = r600_pipe_shader_update(draw->ctx, rctx->ps_shader); if (r) return r; - r = radeon_draw_set(rctx->draw, rctx->vs_shader->state); - if (r) - return r; - r = radeon_draw_set(rctx->draw, rctx->ps_shader->state); - if (r) - return r; - r = radeon_draw_set(rctx->draw, rctx->cb_cntl); - if (r) - return r; - r = radeon_draw_set(rctx->draw, rctx->db); + r = radeon_draw_set(rctx->draw, rctx->vs_shader->rstate); if (r) return r; - r = radeon_draw_set(rctx->draw, rctx->config); + r = radeon_draw_set(rctx->draw, rctx->ps_shader->rstate); if (r) return r; for (i = 0 ; i < rctx->vertex_elements->count; i++) { j = rctx->vertex_elements->elements[i].vertex_buffer_index; - rbuffer = (struct r600_resource*)rctx->vertex_buffer[j].buffer; - offset = rctx->vertex_elements->elements[i].src_offset + rctx->vertex_buffer[j].buffer_offset; + vertex_buffer = &rctx->vertex_buffer[j]; + rbuffer = (struct r600_resource*)vertex_buffer->buffer; + offset = rctx->vertex_elements->elements[i].src_offset + vertex_buffer->buffer_offset; r = r600_conv_pipe_format(rctx->vertex_elements->elements[i].src_format, &format); if (r) return r; @@ -114,7 +110,7 @@ static int r600_draw_common(struct r600_draw *draw) vs_resource->nbo = 1; vs_resource->states[R600_PS_RESOURCE__RESOURCE0_WORD0] = offset; vs_resource->states[R600_PS_RESOURCE__RESOURCE0_WORD1] = rbuffer->bo->size - offset; - vs_resource->states[R600_PS_RESOURCE__RESOURCE0_WORD2] = S_038008_STRIDE(rctx->vertex_buffer[j].stride) | + vs_resource->states[R600_PS_RESOURCE__RESOURCE0_WORD2] = S_038008_STRIDE(vertex_buffer->stride) | S_038008_DATA_FORMAT(format); vs_resource->states[R600_PS_RESOURCE__RESOURCE0_WORD3] = 0x00000000; vs_resource->states[R600_PS_RESOURCE__RESOURCE0_WORD4] = 0x00000000; @@ -126,17 +122,19 @@ static int r600_draw_common(struct r600_draw *draw) if (r) return r; } +#if 0 /* setup texture sampler & resource */ - for (i = 0 ; i < rctx->nps_sampler; i++) { - r = radeon_draw_set_new(rctx->draw, rctx->ps_sampler[i]); + for (i = 0 ; i < rctx->ps_nsampler; i++) { + r = radeon_draw_set_new(rctx->draw, rctx->ps_sampler[i]->rstate); if (r) return r; } - for (i = 0 ; i < rctx->nps_view; i++) { - r = radeon_draw_set_new(rctx->draw, rctx->ps_view[i]->state); + for (i = 0 ; i < rctx->ps_nsampler_view; i++) { + r = radeon_draw_set_new(rctx->draw, rctx->ps_sampler_view[i]->rstate); if (r) return r; } +#endif /* FIXME start need to change winsys */ draw->draw = radeon_state(rscreen->rw, R600_DRAW_TYPE, R600_DRAW); if (draw->draw == NULL) diff --git a/src/gallium/drivers/r600/r600_resource.c b/src/gallium/drivers/r600/r600_resource.c index 292c5d294d..8dc411ef40 100644 --- a/src/gallium/drivers/r600/r600_resource.c +++ b/src/gallium/drivers/r600/r600_resource.c @@ -24,7 +24,6 @@ #include "r600_context.h" #include "r600_resource.h" #include "r600_screen.h" -#include "r600_texture.h" static struct pipe_resource *r600_resource_create(struct pipe_screen *screen, const struct pipe_resource *templ) diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index 3909c704e7..8837a7272b 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -98,43 +98,40 @@ static int r600_shader_update(struct pipe_context *ctx, struct r600_shader *shad return r600_bc_build(&shader->bc); } -struct r600_pipe_shader *r600_pipe_shader_create(struct pipe_context *ctx, - const struct tgsi_token *tokens) +int r600_pipe_shader_create(struct pipe_context *ctx, + struct r600_context_state *rpshader, + const struct tgsi_token *tokens) { struct r600_screen *rscreen = r600_screen(ctx->screen); - struct r600_pipe_shader *rpshader = CALLOC_STRUCT(r600_pipe_shader); int r; fprintf(stderr, "--------------------------------------------------------------\n"); tgsi_dump(tokens, 0); if (rpshader == NULL) - return NULL; + return -ENOMEM; rpshader->shader.family = radeon_get_family(rscreen->rw); r = r600_shader_from_tgsi(tokens, &rpshader->shader); if (r) { R600_ERR("translation from TGSI failed !\n"); - goto out_err; + return r; } r = r600_bc_build(&rpshader->shader.bc); if (r) { R600_ERR("building bytecode failed !\n"); - goto out_err; + return r; } fprintf(stderr, "______________________________________________________________\n"); - return rpshader; -out_err: - free(rpshader); - return NULL; + return 0; } -static int r600_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shader *rpshader) +static int r600_pipe_shader_vs(struct pipe_context *ctx, struct r600_context_state *rpshader) { struct r600_screen *rscreen = r600_screen(ctx->screen); struct r600_shader *rshader = &rpshader->shader; struct radeon_state *state; unsigned i, j, tmp; - rpshader->state = radeon_state_decref(rpshader->state); + rpshader->rstate = radeon_state_decref(rpshader->rstate); state = radeon_state(rscreen->rw, R600_VS_SHADER_TYPE, R600_VS_SHADER); if (state == NULL) return -ENOMEM; @@ -150,22 +147,22 @@ static int r600_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shader } state->states[R600_VS_SHADER__SPI_VS_OUT_CONFIG] = S_0286C4_VS_EXPORT_COUNT(rshader->noutput - 2); state->states[R600_VS_SHADER__SQ_PGM_RESOURCES_VS] = S_028868_NUM_GPRS(rshader->bc.ngpr); - rpshader->state = state; - rpshader->state->bo[0] = radeon_bo_incref(rscreen->rw, rpshader->bo); - rpshader->state->bo[1] = radeon_bo_incref(rscreen->rw, rpshader->bo); - rpshader->state->nbo = 2; - rpshader->state->placement[0] = RADEON_GEM_DOMAIN_GTT; + rpshader->rstate = state; + rpshader->rstate->bo[0] = radeon_bo_incref(rscreen->rw, rpshader->bo); + rpshader->rstate->bo[1] = radeon_bo_incref(rscreen->rw, rpshader->bo); + rpshader->rstate->nbo = 2; + rpshader->rstate->placement[0] = RADEON_GEM_DOMAIN_GTT; return radeon_state_pm4(state); } -static int r600_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader *rpshader) +static int r600_pipe_shader_ps(struct pipe_context *ctx, struct r600_context_state *rpshader) { struct r600_screen *rscreen = r600_screen(ctx->screen); struct r600_shader *rshader = &rpshader->shader; struct radeon_state *state; unsigned i, tmp; - rpshader->state = radeon_state_decref(rpshader->state); + rpshader->rstate = radeon_state_decref(rpshader->rstate); state = radeon_state(rscreen->rw, R600_PS_SHADER_TYPE, R600_PS_SHADER); if (state == NULL) return -ENOMEM; @@ -180,14 +177,14 @@ static int r600_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader state->states[R600_PS_SHADER__SPI_PS_IN_CONTROL_1] = 0x00000000; state->states[R600_PS_SHADER__SQ_PGM_RESOURCES_PS] = S_028868_NUM_GPRS(rshader->bc.ngpr); state->states[R600_PS_SHADER__SQ_PGM_EXPORTS_PS] = 0x00000002; - rpshader->state = state; - rpshader->state->bo[0] = radeon_bo_incref(rscreen->rw, rpshader->bo); - rpshader->state->nbo = 1; - rpshader->state->placement[0] = RADEON_GEM_DOMAIN_GTT; + rpshader->rstate = state; + rpshader->rstate->bo[0] = radeon_bo_incref(rscreen->rw, rpshader->bo); + rpshader->rstate->nbo = 1; + rpshader->rstate->placement[0] = RADEON_GEM_DOMAIN_GTT; return radeon_state_pm4(state); } -static int r600_pipe_shader(struct pipe_context *ctx, struct r600_pipe_shader *rpshader) +static int r600_pipe_shader(struct pipe_context *ctx, struct r600_context_state *rpshader) { struct r600_screen *rscreen = r600_screen(ctx->screen); struct r600_context *rctx = r600_context(ctx); @@ -221,7 +218,7 @@ static int r600_pipe_shader(struct pipe_context *ctx, struct r600_pipe_shader *r return r; } -int r600_pipe_shader_update(struct pipe_context *ctx, struct r600_pipe_shader *rpshader) +int r600_pipe_shader_update(struct pipe_context *ctx, struct r600_context_state *rpshader) { struct r600_context *rctx = r600_context(ctx); int r; diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index 7d67e28c06..0f1e1cd761 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -32,18 +32,557 @@ #include "r600_resource.h" #include "r600d.h" +static void *r600_create_blend_state(struct pipe_context *ctx, + const struct pipe_blend_state *state) +{ + struct r600_context *rctx = r600_context(ctx); + + return r600_context_state(rctx, pipe_blend_type, state); +} + +static void *r600_create_dsa_state(struct pipe_context *ctx, + const struct pipe_depth_stencil_alpha_state *state) +{ + struct r600_context *rctx = r600_context(ctx); + + return r600_context_state(rctx, pipe_dsa_type, state); +} + +static void *r600_create_rs_state(struct pipe_context *ctx, + const struct pipe_rasterizer_state *state) +{ + struct r600_context *rctx = r600_context(ctx); + + return r600_context_state(rctx, pipe_rasterizer_type, state); +} + +static void *r600_create_sampler_state(struct pipe_context *ctx, + const struct pipe_sampler_state *state) +{ + struct r600_context *rctx = r600_context(ctx); + + return r600_context_state(rctx, pipe_sampler_type, state); +} + +static void r600_sampler_view_destroy(struct pipe_context *ctx, + struct pipe_sampler_view *state) +{ + struct r600_context_state *rstate = (struct r600_context_state *)state; + + r600_context_state_decref(rstate); +} + +static struct pipe_sampler_view *r600_create_sampler_view(struct pipe_context *ctx, + struct pipe_resource *texture, + const struct pipe_sampler_view *state) +{ + struct r600_context *rctx = r600_context(ctx); + struct r600_context_state *rstate; + + rstate = r600_context_state(rctx, pipe_sampler_type, state); + pipe_reference(NULL, &texture->reference); + rstate->state.sampler_view.texture = texture; + rstate->state.sampler_view.reference.count = 1; + return &rstate->state.sampler_view; +} + +static void *r600_create_shader_state(struct pipe_context *ctx, + const struct pipe_shader_state *state) +{ + struct r600_context *rctx = r600_context(ctx); + + return r600_context_state(rctx, pipe_shader_type, state); +} + +static void *r600_create_vertex_elements(struct pipe_context *ctx, + unsigned count, + const struct pipe_vertex_element *elements) +{ + struct r600_vertex_element *v = CALLOC_STRUCT(r600_vertex_element); + + assert(count < 32); + v->count = count; + memcpy(v->elements, elements, count * sizeof(struct pipe_vertex_element)); + v->refcount = 1; + return v; +} + +static void r600_bind_state(struct pipe_context *ctx, void *state) +{ + struct r600_context *rctx = r600_context(ctx); + struct r600_context_state *rstate = (struct r600_context_state *)state; + + if (state == NULL) + return; + switch (rstate->type) { + case pipe_rasterizer_type: + rctx->rasterizer = r600_context_state_decref(rctx->rasterizer); + rctx->rasterizer = r600_context_state_incref(rstate); + break; + case pipe_poly_stipple_type: + rctx->poly_stipple = r600_context_state_decref(rctx->poly_stipple); + rctx->poly_stipple = r600_context_state_incref(rstate); + break; + case pipe_scissor_type: + rctx->scissor = r600_context_state_decref(rctx->scissor); + rctx->scissor = r600_context_state_incref(rstate); + break; + case pipe_clip_type: + rctx->clip = r600_context_state_decref(rctx->clip); + rctx->clip = r600_context_state_incref(rstate); + break; + case pipe_depth_type: + rctx->depth = r600_context_state_decref(rctx->depth); + rctx->depth = r600_context_state_incref(rstate); + break; + case pipe_stencil_type: + rctx->stencil = r600_context_state_decref(rctx->stencil); + rctx->stencil = r600_context_state_incref(rstate); + break; + case pipe_alpha_type: + rctx->alpha = r600_context_state_decref(rctx->alpha); + rctx->alpha = r600_context_state_incref(rstate); + break; + case pipe_dsa_type: + rctx->dsa = r600_context_state_decref(rctx->dsa); + rctx->dsa = r600_context_state_incref(rstate); + break; + case pipe_blend_type: + rctx->blend = r600_context_state_decref(rctx->blend); + rctx->blend = r600_context_state_incref(rstate); + break; + case pipe_framebuffer_type: + rctx->framebuffer = r600_context_state_decref(rctx->framebuffer); + rctx->framebuffer = r600_context_state_incref(rstate); + break; + case pipe_stencil_ref_type: + rctx->stencil_ref = r600_context_state_decref(rctx->stencil_ref); + rctx->stencil_ref = r600_context_state_incref(rstate); + break; + case pipe_viewport_type: + rctx->viewport = r600_context_state_decref(rctx->viewport); + rctx->viewport = r600_context_state_incref(rstate); + break; + case pipe_shader_type: + case pipe_sampler_type: + case pipe_sampler_view_type: + default: + R600_ERR("invalid type %d\n", rstate->type); + return; + } +} + +static void r600_bind_ps_shader(struct pipe_context *ctx, void *state) +{ + struct r600_context *rctx = r600_context(ctx); + struct r600_context_state *rstate = (struct r600_context_state *)state; + + rctx->ps_shader = r600_context_state_decref(rctx->ps_shader); + rctx->ps_shader = r600_context_state_incref(rstate); +} + +static void r600_bind_vs_shader(struct pipe_context *ctx, void *state) +{ + struct r600_context *rctx = r600_context(ctx); + struct r600_context_state *rstate = (struct r600_context_state *)state; + + rctx->vs_shader = r600_context_state_decref(rctx->vs_shader); + rctx->vs_shader = r600_context_state_incref(rstate); +} + +static void r600_delete_vertex_element(struct pipe_context *ctx, void *state) +{ + struct r600_vertex_element *v = (struct r600_vertex_element*)state; + + if (v == NULL) + return; + if (--v->refcount) + return; + free(v); +} + +static void r600_bind_vertex_elements(struct pipe_context *ctx, void *state) +{ + struct r600_context *rctx = r600_context(ctx); + struct r600_vertex_element *v = (struct r600_vertex_element*)state; + + r600_delete_vertex_element(ctx, rctx->vertex_elements); + rctx->vertex_elements = v; + if (v) { + v->refcount++; + } +} + +static void r600_bind_ps_sampler(struct pipe_context *ctx, + unsigned count, void **states) +{ + struct r600_context *rctx = r600_context(ctx); + struct r600_context_state *rstate; + unsigned i; + + for (i = 0; i < rctx->ps_nsampler; i++) { + rctx->ps_sampler[i] = r600_context_state_decref(rctx->ps_sampler[i]); + } + for (i = 0; i < count; i++) { + rstate = (struct r600_context_state *)states[i]; + rctx->ps_sampler[i] = r600_context_state_incref(rstate); + } + rctx->ps_nsampler = count; +} + +static void r600_bind_vs_sampler(struct pipe_context *ctx, + unsigned count, void **states) +{ + struct r600_context *rctx = r600_context(ctx); + struct r600_context_state *rstate; + unsigned i; + + for (i = 0; i < rctx->vs_nsampler; i++) { + rctx->vs_sampler[i] = r600_context_state_decref(rctx->vs_sampler[i]); + } + for (i = 0; i < count; i++) { + rstate = (struct r600_context_state *)states[i]; + rctx->vs_sampler[i] = r600_context_state_incref(rstate); + } + rctx->vs_nsampler = count; +} static void r600_delete_state(struct pipe_context *ctx, void *state) { - struct radeon_state *rstate = state; + struct r600_context_state *rstate = (struct r600_context_state *)state; - radeon_state_decref(rstate); + r600_context_state_decref(rstate); } -static void *r600_create_blend_state(struct pipe_context *ctx, - const struct pipe_blend_state *state) +static void r600_set_blend_color(struct pipe_context *ctx, + const struct pipe_blend_color *color) +{ +} + +static void r600_set_clip_state(struct pipe_context *ctx, + const struct pipe_clip_state *state) +{ +} + +static void r600_set_constant_buffer(struct pipe_context *ctx, + uint shader, uint index, + struct pipe_resource *buffer) { struct r600_screen *rscreen = r600_screen(ctx->screen); + struct r600_context *rctx = r600_context(ctx); + unsigned nconstant = 0, i, type, id; + struct radeon_state *rstate; + struct pipe_transfer *transfer; + u32 *ptr; + + switch (shader) { + case PIPE_SHADER_VERTEX: + id = R600_VS_CONSTANT; + type = R600_VS_CONSTANT_TYPE; + break; + case PIPE_SHADER_FRAGMENT: + id = R600_PS_CONSTANT; + type = R600_PS_CONSTANT_TYPE; + break; + default: + fprintf(stderr, "%s:%d unsupported %d\n", __func__, __LINE__, shader); + return; + } + if (buffer && buffer->width0 > 0) { + nconstant = buffer->width0 / 16; + ptr = pipe_buffer_map(ctx, buffer, PIPE_TRANSFER_READ, &transfer); + if (ptr == NULL) + return; + for (i = 0; i < nconstant; i++) { + rstate = radeon_state(rscreen->rw, type, id + i); + if (rstate == NULL) + return; + rstate->states[R600_PS_CONSTANT__SQ_ALU_CONSTANT0_0] = ptr[i * 4 + 0]; + rstate->states[R600_PS_CONSTANT__SQ_ALU_CONSTANT1_0] = ptr[i * 4 + 1]; + rstate->states[R600_PS_CONSTANT__SQ_ALU_CONSTANT2_0] = ptr[i * 4 + 2]; + rstate->states[R600_PS_CONSTANT__SQ_ALU_CONSTANT3_0] = ptr[i * 4 + 3]; + if (radeon_state_pm4(rstate)) + return; + if (radeon_draw_set_new(rctx->draw, rstate)) + return; + } + pipe_buffer_unmap(ctx, buffer, transfer); + } +} + +static void r600_set_ps_sampler_view(struct pipe_context *ctx, + unsigned count, + struct pipe_sampler_view **views) +{ + struct r600_context *rctx = r600_context(ctx); + struct r600_context_state *rstate; + unsigned i; + + for (i = 0; i < rctx->ps_nsampler_view; i++) { + rctx->ps_sampler_view[i] = r600_context_state_decref(rctx->ps_sampler_view[i]); + } + for (i = 0; i < count; i++) { + rstate = (struct r600_context_state *)views[i]; + rctx->ps_sampler_view[i] = r600_context_state_incref(rstate); + } + rctx->ps_nsampler_view = count; +} + +static void r600_set_vs_sampler_view(struct pipe_context *ctx, + unsigned count, + struct pipe_sampler_view **views) +{ + struct r600_context *rctx = r600_context(ctx); + struct r600_context_state *rstate; + unsigned i; + + for (i = 0; i < rctx->vs_nsampler_view; i++) { + rctx->vs_sampler_view[i] = r600_context_state_decref(rctx->vs_sampler_view[i]); + } + for (i = 0; i < count; i++) { + rstate = (struct r600_context_state *)views[i]; + rctx->vs_sampler_view[i] = r600_context_state_incref(rstate); + } + rctx->vs_nsampler_view = count; +} + +static void r600_set_framebuffer_state(struct pipe_context *ctx, + const struct pipe_framebuffer_state *state) +{ + struct r600_context *rctx = r600_context(ctx); + struct r600_context_state *rstate; + + rstate = r600_context_state(rctx, pipe_framebuffer_type, state); + r600_bind_state(ctx, rstate); +} + +static void r600_set_polygon_stipple(struct pipe_context *ctx, + const struct pipe_poly_stipple *state) +{ +} + +static void r600_set_sample_mask(struct pipe_context *pipe, unsigned sample_mask) +{ +} + +static void r600_set_scissor_state(struct pipe_context *ctx, + const struct pipe_scissor_state *state) +{ + struct r600_context *rctx = r600_context(ctx); + struct r600_context_state *rstate; + + rstate = r600_context_state(rctx, pipe_scissor_type, state); + r600_bind_state(ctx, rstate); +} + +static void r600_set_stencil_ref(struct pipe_context *ctx, + const struct pipe_stencil_ref *state) +{ + struct r600_context *rctx = r600_context(ctx); + struct r600_context_state *rstate; + + rstate = r600_context_state(rctx, pipe_stencil_ref_type, state); + r600_bind_state(ctx, rstate); +} + +static void r600_set_vertex_buffers(struct pipe_context *ctx, + unsigned count, + const struct pipe_vertex_buffer *buffers) +{ + struct r600_context *rctx = r600_context(ctx); + unsigned i; + + for (i = 0; i < rctx->nvertex_buffer; i++) { + pipe_resource_reference(&rctx->vertex_buffer[i].buffer, NULL); + } + memcpy(rctx->vertex_buffer, buffers, sizeof(struct pipe_vertex_buffer) * count); + for (i = 0; i < count; i++) { + rctx->vertex_buffer[i].buffer = NULL; + pipe_resource_reference(&rctx->vertex_buffer[i].buffer, buffers[i].buffer); + } + rctx->nvertex_buffer = count; +} + +static void r600_set_viewport_state(struct pipe_context *ctx, + const struct pipe_viewport_state *state) +{ + struct r600_context *rctx = r600_context(ctx); + struct r600_context_state *rstate; + + rstate = r600_context_state(rctx, pipe_viewport_type, state); + r600_bind_state(ctx, rstate); +} + +void r600_init_state_functions(struct r600_context *rctx) +{ + rctx->context.create_blend_state = r600_create_blend_state; + rctx->context.create_depth_stencil_alpha_state = r600_create_dsa_state; + rctx->context.create_fs_state = r600_create_shader_state; + rctx->context.create_rasterizer_state = r600_create_rs_state; + rctx->context.create_sampler_state = r600_create_sampler_state; + rctx->context.create_sampler_view = r600_create_sampler_view; + rctx->context.create_vertex_elements_state = r600_create_vertex_elements; + rctx->context.create_vs_state = r600_create_shader_state; + rctx->context.bind_blend_state = r600_bind_state; + rctx->context.bind_depth_stencil_alpha_state = r600_bind_state; + rctx->context.bind_fragment_sampler_states = r600_bind_ps_sampler; + rctx->context.bind_fs_state = r600_bind_ps_shader; + rctx->context.bind_rasterizer_state = r600_bind_state; + rctx->context.bind_vertex_elements_state = r600_bind_vertex_elements; + rctx->context.bind_vertex_sampler_states = r600_bind_vs_sampler; + rctx->context.bind_vs_state = r600_bind_vs_shader; + rctx->context.delete_blend_state = r600_delete_state; + rctx->context.delete_depth_stencil_alpha_state = r600_delete_state; + rctx->context.delete_fs_state = r600_delete_state; + rctx->context.delete_rasterizer_state = r600_delete_state; + rctx->context.delete_sampler_state = r600_delete_state; + rctx->context.delete_vertex_elements_state = r600_delete_vertex_element; + rctx->context.delete_vs_state = r600_delete_state; + rctx->context.set_blend_color = r600_set_blend_color; + rctx->context.set_clip_state = r600_set_clip_state; + rctx->context.set_constant_buffer = r600_set_constant_buffer; + rctx->context.set_fragment_sampler_views = r600_set_ps_sampler_view; + rctx->context.set_framebuffer_state = r600_set_framebuffer_state; + rctx->context.set_polygon_stipple = r600_set_polygon_stipple; + rctx->context.set_sample_mask = r600_set_sample_mask; + rctx->context.set_scissor_state = r600_set_scissor_state; + rctx->context.set_stencil_ref = r600_set_stencil_ref; + rctx->context.set_vertex_buffers = r600_set_vertex_buffers; + rctx->context.set_vertex_sampler_views = r600_set_vs_sampler_view; + rctx->context.set_viewport_state = r600_set_viewport_state; + rctx->context.sampler_view_destroy = r600_sampler_view_destroy; +} + +struct r600_context_state *r600_context_state_incref(struct r600_context_state *rstate) +{ + if (rstate == NULL) + return NULL; + rstate->refcount++; + return rstate; +} + +struct r600_context_state *r600_context_state_decref(struct r600_context_state *rstate) +{ + unsigned i; + + if (rstate == NULL) + return NULL; + if (--rstate->refcount) + return NULL; + switch (rstate->type) { + case pipe_sampler_view_type: + pipe_resource_reference(&rstate->state.sampler_view.texture, NULL); + break; + case pipe_framebuffer_type: + for (i = 0; i < rstate->state.framebuffer.nr_cbufs; i++) { + pipe_surface_reference(&rstate->state.framebuffer.cbufs[i], NULL); + } + pipe_surface_reference(&rstate->state.framebuffer.zsbuf, NULL); + break; + case pipe_viewport_type: + case pipe_depth_type: + case pipe_rasterizer_type: + case pipe_poly_stipple_type: + case pipe_scissor_type: + case pipe_clip_type: + case pipe_stencil_type: + case pipe_alpha_type: + case pipe_dsa_type: + case pipe_blend_type: + case pipe_stencil_ref_type: + case pipe_shader_type: + case pipe_sampler_type: + break; + default: + R600_ERR("invalid type %d\n", rstate->type); + return NULL; + } + radeon_state_decref(rstate->rstate); + FREE(rstate); + return NULL; +} + +struct r600_context_state *r600_context_state(struct r600_context *rctx, unsigned type, const void *state) +{ + struct r600_context_state *rstate = CALLOC_STRUCT(r600_context_state); + const union pipe_states *states = state; + unsigned i; + int r; + + if (rstate == NULL) + return NULL; + rstate->type = type; + rstate->refcount = 1; + + switch (rstate->type) { + case pipe_sampler_view_type: + rstate->state.sampler_view = (*states).sampler_view; + rstate->state.sampler_view.texture = NULL; + break; + case pipe_framebuffer_type: + rstate->state.framebuffer = (*states).framebuffer; + for (i = 0; i < rstate->state.framebuffer.nr_cbufs; i++) { + pipe_surface_reference(&rstate->state.framebuffer.cbufs[i], + (*states).framebuffer.cbufs[i]); + } + pipe_surface_reference(&rstate->state.framebuffer.zsbuf, + (*states).framebuffer.zsbuf); + break; + case pipe_viewport_type: + rstate->state.viewport = (*states).viewport; + break; + case pipe_depth_type: + rstate->state.depth = (*states).depth; + break; + case pipe_rasterizer_type: + rstate->state.rasterizer = (*states).rasterizer; + break; + case pipe_poly_stipple_type: + rstate->state.poly_stipple = (*states).poly_stipple; + break; + case pipe_scissor_type: + rstate->state.scissor = (*states).scissor; + break; + case pipe_clip_type: + rstate->state.clip = (*states).clip; + break; + case pipe_stencil_type: + rstate->state.stencil = (*states).stencil; + break; + case pipe_alpha_type: + rstate->state.alpha = (*states).alpha; + break; + case pipe_dsa_type: + rstate->state.dsa = (*states).dsa; + break; + case pipe_blend_type: + rstate->state.blend = (*states).blend; + break; + case pipe_stencil_ref_type: + rstate->state.stencil_ref = (*states).stencil_ref; + break; + case pipe_shader_type: + rstate->state.shader = (*states).shader; + r = r600_pipe_shader_create(&rctx->context, rstate, rstate->state.shader.tokens); + if (r) { + r600_context_state_decref(rstate); + return NULL; + } + break; + case pipe_sampler_type: + rstate->state.sampler = (*states).sampler; + break; + default: + R600_ERR("invalid type %d\n", rstate->type); + FREE(rstate); + return NULL; + } + return rstate; +} + +static struct radeon_state *r600_blend(struct r600_context *rctx) +{ + struct r600_screen *rscreen = rctx->screen; struct radeon_state *rstate; rstate = radeon_state(rscreen->rw, R600_BLEND_TYPE, R600_BLEND); @@ -69,36 +608,19 @@ static void *r600_create_blend_state(struct pipe_context *ctx, return rstate; } -static void r600_bind_blend_state(struct pipe_context *ctx, void *state) +static struct radeon_state *r600_cb0(struct r600_context *rctx) { - struct r600_context *rctx = r600_context(ctx); - radeon_draw_set(rctx->draw, state); -} - -static void r600_set_blend_color(struct pipe_context *ctx, - const struct pipe_blend_color *color) -{ -} - -static void r600_set_clip_state(struct pipe_context *ctx, - const struct pipe_clip_state *state) -{ -} - -static void r600_set_framebuffer_state(struct pipe_context *ctx, - const struct pipe_framebuffer_state *state) -{ - struct r600_screen *rscreen = r600_screen(ctx->screen); - struct r600_context *rctx = r600_context(ctx); + struct r600_screen *rscreen = rctx->screen; struct r600_resource_texture *rtex; struct r600_resource *rbuffer; struct radeon_state *rstate; + const struct pipe_framebuffer_state *state = &rctx->framebuffer->state.framebuffer; unsigned level = state->cbufs[0]->level; unsigned pitch, slice; rstate = radeon_state(rscreen->rw, R600_CB0_TYPE, R600_CB0); if (rstate == NULL) - return; + return NULL; rtex = (struct r600_resource_texture*)state->cbufs[0]->texture; rbuffer = &rtex->resource; rstate->bo[0] = radeon_bo_incref(rscreen->rw, rbuffer->bo); @@ -120,70 +642,53 @@ static void r600_set_framebuffer_state(struct pipe_context *ctx, rstate->states[R600_CB0__CB_COLOR0_MASK] = 0x00000000; if (radeon_state_pm4(rstate)) { radeon_state_decref(rstate); - return; + return NULL; } - radeon_draw_set_new(rctx->draw, rstate); - rctx->db = radeon_state_decref(rctx->db); - if(state->zsbuf) { - rtex = (struct r600_resource_texture*)state->zsbuf->texture; - rbuffer = &rtex->resource; - rctx->db = radeon_state(rscreen->rw, R600_DB_TYPE, R600_DB); - if(rctx->db == NULL) - return; - rctx->db->bo[0] = radeon_bo_incref(rscreen->rw, rbuffer->bo); - rctx->db->nbo = 1; - rctx->db->placement[0] = RADEON_GEM_DOMAIN_VRAM; - level = state->zsbuf->level; - pitch = rtex->pitch[level] / 8 - 1; - slice = rtex->pitch[level] * state->zsbuf->height / 64 - 1; - - rctx->db->states[R600_DB__DB_DEPTH_BASE] = 0x00000000; - rctx->db->states[R600_DB__DB_DEPTH_INFO] = 0x00010006; - rctx->db->states[R600_DB__DB_DEPTH_VIEW] = 0x00000000; - rctx->db->states[R600_DB__DB_PREFETCH_LIMIT] = (state->zsbuf->height / 8) -1; - rctx->db->states[R600_DB__DB_DEPTH_SIZE] = S_028000_PITCH_TILE_MAX(pitch) | - S_028000_SLICE_TILE_MAX(slice); - } else - rctx->db = NULL; - rctx->fb_state = *state; -} - -static void *r600_create_fs_state(struct pipe_context *ctx, - const struct pipe_shader_state *shader) -{ - return r600_pipe_shader_create(ctx, shader->tokens); -} - -static void r600_bind_fs_state(struct pipe_context *ctx, void *state) -{ - struct r600_context *rctx = r600_context(ctx); - - rctx->ps_shader = state; + return rstate; } -static void *r600_create_vs_state(struct pipe_context *ctx, - const struct pipe_shader_state *shader) +static struct radeon_state *r600_db(struct r600_context *rctx) { - return r600_pipe_shader_create(ctx, shader->tokens); -} + struct r600_screen *rscreen = rctx->screen; + struct r600_resource_texture *rtex; + struct r600_resource *rbuffer; + struct radeon_state *rstate; + const struct pipe_framebuffer_state *state = &rctx->framebuffer->state.framebuffer; + unsigned level = state->cbufs[0]->level; + unsigned pitch, slice; -static void r600_bind_vs_state(struct pipe_context *ctx, void *state) -{ - struct r600_context *rctx = r600_context(ctx); + if (state->zsbuf == NULL) + return NULL; - rctx->vs_shader = state; -} + rstate = radeon_state(rscreen->rw, R600_DB_TYPE, R600_DB); + if (rstate == NULL) + return NULL; -static void r600_set_polygon_stipple(struct pipe_context *ctx, - const struct pipe_poly_stipple *state) -{ + rtex = (struct r600_resource_texture*)state->zsbuf->texture; + rbuffer = &rtex->resource; + rstate->bo[0] = radeon_bo_incref(rscreen->rw, rbuffer->bo); + rstate->nbo = 1; + rstate->placement[0] = RADEON_GEM_DOMAIN_VRAM; + level = state->zsbuf->level; + pitch = rtex->pitch[level] / 8 - 1; + slice = rtex->pitch[level] * state->zsbuf->height / 64 - 1; + rstate->states[R600_DB__DB_DEPTH_BASE] = 0x00000000; + rstate->states[R600_DB__DB_DEPTH_INFO] = 0x00010006; + rstate->states[R600_DB__DB_DEPTH_VIEW] = 0x00000000; + rstate->states[R600_DB__DB_PREFETCH_LIMIT] = (state->zsbuf->height / 8) -1; + rstate->states[R600_DB__DB_DEPTH_SIZE] = S_028000_PITCH_TILE_MAX(pitch) | + S_028000_SLICE_TILE_MAX(slice); + if (radeon_state_pm4(rstate)) { + radeon_state_decref(rstate); + return NULL; + } + return rstate; } -static void *r600_create_rs_state(struct pipe_context *ctx, - const struct pipe_rasterizer_state *state) +static struct radeon_state *r600_rasterizer(struct r600_context *rctx) { - struct r600_screen *rscreen = r600_screen(ctx->screen); - struct r600_context *rctx = r600_context(ctx); + const struct pipe_rasterizer_state *state = &rctx->rasterizer->state.rasterizer; + struct r600_screen *rscreen = rctx->screen; struct radeon_state *rstate; rctx->flat_shade = state->flatshade; @@ -220,10 +725,102 @@ R600_ERR("flat shade with texture broke tex coord interp\n"); return rstate; } -static void r600_bind_rs_state(struct pipe_context *ctx, void *state) +static struct radeon_state *r600_scissor(struct r600_context *rctx) { - struct r600_context *rctx = r600_context(ctx); - radeon_draw_set(rctx->draw, state); + const struct pipe_scissor_state *state = &rctx->scissor->state.scissor; + struct r600_screen *rscreen = rctx->screen; + struct radeon_state *rstate; + u32 tl, br; + + tl = S_028240_TL_X(state->minx) | S_028240_TL_Y(state->miny) | S_028240_WINDOW_OFFSET_DISABLE(1); + br = S_028244_BR_X(state->maxx) | S_028244_BR_Y(state->maxy); + rstate = radeon_state(rscreen->rw, R600_SCISSOR_TYPE, R600_SCISSOR); + if (rstate == NULL) + return NULL; + rstate->states[R600_SCISSOR__PA_SC_SCREEN_SCISSOR_TL] = tl; + rstate->states[R600_SCISSOR__PA_SC_SCREEN_SCISSOR_BR] = br; + rstate->states[R600_SCISSOR__PA_SC_WINDOW_OFFSET] = 0x00000000; + rstate->states[R600_SCISSOR__PA_SC_WINDOW_SCISSOR_TL] = tl; + rstate->states[R600_SCISSOR__PA_SC_WINDOW_SCISSOR_BR] = br; + rstate->states[R600_SCISSOR__PA_SC_CLIPRECT_RULE] = 0x0000FFFF; + rstate->states[R600_SCISSOR__PA_SC_CLIPRECT_0_TL] = tl; + rstate->states[R600_SCISSOR__PA_SC_CLIPRECT_0_BR] = br; + rstate->states[R600_SCISSOR__PA_SC_CLIPRECT_1_TL] = tl; + rstate->states[R600_SCISSOR__PA_SC_CLIPRECT_1_BR] = br; + rstate->states[R600_SCISSOR__PA_SC_CLIPRECT_2_TL] = tl; + rstate->states[R600_SCISSOR__PA_SC_CLIPRECT_2_BR] = br; + rstate->states[R600_SCISSOR__PA_SC_CLIPRECT_3_TL] = tl; + rstate->states[R600_SCISSOR__PA_SC_CLIPRECT_3_BR] = br; + rstate->states[R600_SCISSOR__PA_SC_EDGERULE] = 0xAAAAAAAA; + rstate->states[R600_SCISSOR__PA_SC_GENERIC_SCISSOR_TL] = tl; + rstate->states[R600_SCISSOR__PA_SC_GENERIC_SCISSOR_BR] = br; + rstate->states[R600_SCISSOR__PA_SC_VPORT_SCISSOR_0_TL] = tl; + rstate->states[R600_SCISSOR__PA_SC_VPORT_SCISSOR_0_BR] = br; + if (radeon_state_pm4(rstate)) { + radeon_state_decref(rstate); + return NULL; + } + return rstate; +} + +static struct radeon_state *r600_viewport(struct r600_context *rctx) +{ + const struct pipe_viewport_state *state = &rctx->viewport->state.viewport; + struct r600_screen *rscreen = rctx->screen; + struct radeon_state *rstate; + + rstate = radeon_state(rscreen->rw, R600_VIEWPORT_TYPE, R600_VIEWPORT); + if (rstate == NULL) + return NULL; + rstate->states[R600_VIEWPORT__PA_SC_VPORT_ZMIN_0] = 0x00000000; + rstate->states[R600_VIEWPORT__PA_SC_VPORT_ZMAX_0] = 0x3F800000; + rstate->states[R600_VIEWPORT__PA_CL_VPORT_XSCALE_0] = fui(state->scale[0]); + rstate->states[R600_VIEWPORT__PA_CL_VPORT_YSCALE_0] = fui(state->scale[1]); + rstate->states[R600_VIEWPORT__PA_CL_VPORT_ZSCALE_0] = fui(state->scale[2]); + rstate->states[R600_VIEWPORT__PA_CL_VPORT_XOFFSET_0] = fui(state->translate[0]); + rstate->states[R600_VIEWPORT__PA_CL_VPORT_YOFFSET_0] = fui(state->translate[1]); + rstate->states[R600_VIEWPORT__PA_CL_VPORT_ZOFFSET_0] = fui(state->translate[2]); + rstate->states[R600_VIEWPORT__PA_CL_VTE_CNTL] = 0x0000043F; + if (radeon_state_pm4(rstate)) { + radeon_state_decref(rstate); + return NULL; + } + return rstate; +} + +static struct radeon_state *r600_dsa(struct r600_context *rctx) +{ + const struct pipe_depth_stencil_alpha_state *state = &rctx->dsa->state.dsa; + struct r600_screen *rscreen = rctx->screen; + struct radeon_state *rstate; + unsigned db_depth_control; + + rstate = radeon_state(rscreen->rw, R600_DSA_TYPE, R600_DSA); + if (rstate == NULL) + return NULL; + db_depth_control = 0x00700700 | S_028800_Z_ENABLE(state->depth.enabled) | S_028800_Z_WRITE_ENABLE(state->depth.writemask) | S_028800_ZFUNC(state->depth.func); + + rstate->states[R600_DSA__DB_STENCIL_CLEAR] = 0x00000000; + rstate->states[R600_DSA__DB_DEPTH_CLEAR] = 0x3F800000; + rstate->states[R600_DSA__SX_ALPHA_TEST_CONTROL] = 0x00000000; + rstate->states[R600_DSA__DB_STENCILREFMASK] = 0xFFFFFF00; + rstate->states[R600_DSA__DB_STENCILREFMASK_BF] = 0xFFFFFF00; + rstate->states[R600_DSA__SX_ALPHA_REF] = 0x00000000; + rstate->states[R600_DSA__SPI_FOG_FUNC_SCALE] = 0x00000000; + rstate->states[R600_DSA__SPI_FOG_FUNC_BIAS] = 0x00000000; + rstate->states[R600_DSA__SPI_FOG_CNTL] = 0x00000000; + rstate->states[R600_DSA__DB_DEPTH_CONTROL] = db_depth_control; + rstate->states[R600_DSA__DB_SHADER_CONTROL] = 0x00000210; + rstate->states[R600_DSA__DB_RENDER_CONTROL] = 0x00000060; + rstate->states[R600_DSA__DB_RENDER_OVERRIDE] = 0x0000002A; + rstate->states[R600_DSA__DB_SRESULTS_COMPARE_STATE1] = 0x00000000; + rstate->states[R600_DSA__DB_PRELOAD_CONTROL] = 0x00000000; + rstate->states[R600_DSA__DB_ALPHA_TO_MASK] = 0x0000AA00; + if (radeon_state_pm4(rstate)) { + radeon_state_decref(rstate); + return NULL; + } + return rstate; } static inline unsigned r600_tex_wrap(unsigned wrap) @@ -296,13 +893,14 @@ static inline unsigned r600_tex_compare(unsigned compare) } } -static void *r600_create_sampler_state(struct pipe_context *ctx, - const struct pipe_sampler_state *state) +static struct radeon_state *r600_sampler(struct r600_context *rctx, + const struct pipe_sampler_state *state, + unsigned id) { - struct r600_screen *rscreen = r600_screen(ctx->screen); + struct r600_screen *rscreen = rctx->screen; struct radeon_state *rstate; - rstate = radeon_state(rscreen->rw, R600_PS_SAMPLER_TYPE, R600_PS_SAMPLER); + rstate = radeon_state(rscreen->rw, R600_PS_SAMPLER_TYPE, id); if (rstate == NULL) return NULL; rstate->states[R600_PS_SAMPLER__SQ_TEX_SAMPLER_WORD0_0] = @@ -314,33 +912,16 @@ static void *r600_create_sampler_state(struct pipe_context *ctx, S_03C000_MIP_FILTER(r600_tex_mipfilter(state->min_mip_filter)) | S_03C000_DEPTH_COMPARE_FUNCTION(r600_tex_compare(state->compare_func)); /* FIXME LOD it depends on texture base level ... */ - rstate->states[R600_PS_SAMPLER__SQ_TEX_SAMPLER_WORD1_0] = - S_03C004_MIN_LOD(0) | - S_03C004_MAX_LOD(0) | - S_03C004_LOD_BIAS(0); - rstate->states[R600_PS_SAMPLER__SQ_TEX_SAMPLER_WORD2_0] = S_03C008_TYPE(1); - if (radeon_state_pm4(rstate)) { - radeon_state_decref(rstate); - return NULL; - } - return rstate; -} - -static void r600_bind_sampler_states(struct pipe_context *ctx, - unsigned count, void **states) -{ - struct r600_context *rctx = r600_context(ctx); - unsigned i; - - /* FIXME split VS/PS/GS sampler */ - for (i = 0; i < count; i++) { - rctx->ps_sampler[i] = radeon_state_decref(rctx->ps_sampler[i]); - } - rctx->nps_sampler = count; - for (i = 0; i < count; i++) { - rctx->ps_sampler[i] = radeon_state_incref(states[i]); - rctx->ps_sampler[i]->id = R600_PS_SAMPLER + i; + rstate->states[R600_PS_SAMPLER__SQ_TEX_SAMPLER_WORD1_0] = + S_03C004_MIN_LOD(0) | + S_03C004_MAX_LOD(0) | + S_03C004_LOD_BIAS(0); + rstate->states[R600_PS_SAMPLER__SQ_TEX_SAMPLER_WORD2_0] = S_03C008_TYPE(1); + if (radeon_state_pm4(rstate)) { + radeon_state_decref(rstate); + return NULL; } + return rstate; } static inline unsigned r600_tex_swizzle(unsigned swizzle) @@ -390,57 +971,47 @@ static inline unsigned r600_tex_dim(unsigned dim) } } -static struct pipe_sampler_view *r600_create_sampler_view(struct pipe_context *ctx, - struct pipe_resource *texture, - const struct pipe_sampler_view *view) +static struct radeon_state *r600_resource(struct r600_context *rctx, + const struct pipe_sampler_view *view, + unsigned id) { - struct r600_screen *rscreen = r600_screen(ctx->screen); - struct r600_texture_resource *rtexture; + struct r600_screen *rscreen = rctx->screen; const struct util_format_description *desc; struct r600_resource_texture *tmp; struct r600_resource *rbuffer; + struct radeon_state *rstate; unsigned format; - if (r600_conv_pipe_format(texture->format, &format)) + if (r600_conv_pipe_format(view->texture->format, &format)) return NULL; - rtexture = CALLOC_STRUCT(r600_texture_resource); - if (rtexture == NULL) - return NULL; - desc = util_format_description(texture->format); + desc = util_format_description(view->texture->format); assert(desc == NULL); - rtexture->state = radeon_state(rscreen->rw, R600_PS_RESOURCE_TYPE, R600_PS_RESOURCE); - if (rtexture->state == NULL) { - FREE(rtexture); + rstate = radeon_state(rscreen->rw, R600_PS_RESOURCE_TYPE, id); + if (rstate == NULL) { return NULL; } - rtexture->view = *view; - rtexture->view.reference.count = 1; - rtexture->view.texture = NULL; - pipe_resource_reference(&rtexture->view.texture, texture); - rtexture->view.context = ctx; - - tmp = (struct r600_resource_texture*)texture; + tmp = (struct r600_resource_texture*)view->texture; rbuffer = &tmp->resource; - rtexture->state->bo[0] = radeon_bo_incref(rscreen->rw, rbuffer->bo); - rtexture->state->bo[1] = radeon_bo_incref(rscreen->rw, rbuffer->bo); - rtexture->state->nbo = 2; - rtexture->state->placement[0] = RADEON_GEM_DOMAIN_GTT; - rtexture->state->placement[1] = RADEON_GEM_DOMAIN_GTT; - rtexture->state->placement[2] = RADEON_GEM_DOMAIN_GTT; - rtexture->state->placement[3] = RADEON_GEM_DOMAIN_GTT; + rstate->bo[0] = radeon_bo_incref(rscreen->rw, rbuffer->bo); + rstate->bo[1] = radeon_bo_incref(rscreen->rw, rbuffer->bo); + rstate->nbo = 2; + rstate->placement[0] = RADEON_GEM_DOMAIN_GTT; + rstate->placement[1] = RADEON_GEM_DOMAIN_GTT; + rstate->placement[2] = RADEON_GEM_DOMAIN_GTT; + rstate->placement[3] = RADEON_GEM_DOMAIN_GTT; /* FIXME properly handle first level != 0 */ - rtexture->state->states[R600_PS_RESOURCE__RESOURCE0_WORD0] = - S_038000_DIM(r600_tex_dim(texture->target)) | + rstate->states[R600_PS_RESOURCE__RESOURCE0_WORD0] = + S_038000_DIM(r600_tex_dim(view->texture->target)) | S_038000_PITCH((tmp->pitch[0] / 8) - 1) | - S_038000_TEX_WIDTH(texture->width0 - 1); - rtexture->state->states[R600_PS_RESOURCE__RESOURCE0_WORD1] = - S_038004_TEX_HEIGHT(texture->height0 - 1) | - S_038004_TEX_DEPTH(texture->depth0 - 1) | + S_038000_TEX_WIDTH(view->texture->width0 - 1); + rstate->states[R600_PS_RESOURCE__RESOURCE0_WORD1] = + S_038004_TEX_HEIGHT(view->texture->height0 - 1) | + S_038004_TEX_DEPTH(view->texture->depth0 - 1) | S_038004_DATA_FORMAT(format); - rtexture->state->states[R600_PS_RESOURCE__RESOURCE0_WORD2] = 0; - rtexture->state->states[R600_PS_RESOURCE__RESOURCE0_WORD3] = tmp->offset[1] >> 8; - rtexture->state->states[R600_PS_RESOURCE__RESOURCE0_WORD4] = + rstate->states[R600_PS_RESOURCE__RESOURCE0_WORD2] = 0; + rstate->states[R600_PS_RESOURCE__RESOURCE0_WORD3] = tmp->offset[1] >> 8; + rstate->states[R600_PS_RESOURCE__RESOURCE0_WORD4] = S_038010_FORMAT_COMP_X(r600_format_type(UTIL_FORMAT_TYPE_UNSIGNED)) | S_038010_FORMAT_COMP_Y(r600_format_type(UTIL_FORMAT_TYPE_UNSIGNED)) | S_038010_FORMAT_COMP_Z(r600_format_type(UTIL_FORMAT_TYPE_UNSIGNED)) | @@ -453,225 +1024,12 @@ static struct pipe_sampler_view *r600_create_sampler_view(struct pipe_context *c S_038010_DST_SEL_Z(r600_tex_swizzle(view->swizzle_b)) | S_038010_DST_SEL_W(r600_tex_swizzle(view->swizzle_a)) | S_038010_BASE_LEVEL(view->first_level); - rtexture->state->states[R600_PS_RESOURCE__RESOURCE0_WORD5] = + rstate->states[R600_PS_RESOURCE__RESOURCE0_WORD5] = S_038014_LAST_LEVEL(view->last_level) | S_038014_BASE_ARRAY(0) | S_038014_LAST_ARRAY(0); - rtexture->state->states[R600_PS_RESOURCE__RESOURCE0_WORD6] = + rstate->states[R600_PS_RESOURCE__RESOURCE0_WORD6] = S_038018_TYPE(V_038010_SQ_TEX_VTX_VALID_TEXTURE); - return &rtexture->view; -} - -static void r600_sampler_view_destroy(struct pipe_context *ctx, - struct pipe_sampler_view *view) -{ - struct r600_texture_resource *texture; - - if (view == NULL) - return; - texture = LIST_ENTRY(struct r600_texture_resource, view, view); - radeon_state_decref(texture->state); - FREE(texture); -} - -static void r600_set_fragment_sampler_views(struct pipe_context *ctx, - unsigned count, - struct pipe_sampler_view **views) -{ - struct r600_texture_resource *rtexture; - struct r600_context *rctx = r600_context(ctx); - struct pipe_sampler_view *tmp; - unsigned i, real_num_views = 0; - - if (views == NULL) - return; - - for (i = 0; i < count; i++) { - if (views[i]) - real_num_views++; - } - - for (i = 0; i < rctx->nps_view; i++) { - tmp = &rctx->ps_view[i]->view; - pipe_sampler_view_reference(&tmp, NULL); - rctx->ps_view[i] = NULL; - } - rctx->nps_view = real_num_views; - for (i = 0; i < count; i++) { - - if (!views[i]) - continue; - rtexture = LIST_ENTRY(struct r600_texture_resource, views[i], view); - rctx->ps_view[i] = rtexture; - tmp = NULL; - pipe_sampler_view_reference(&tmp, views[i]); - rtexture->state->id = R600_PS_RESOURCE + i; - } -} - -static void r600_set_vertex_sampler_views(struct pipe_context *ctx, - unsigned count, - struct pipe_sampler_view **views) -{ - struct r600_texture_resource *rtexture; - struct r600_context *rctx = r600_context(ctx); - struct pipe_sampler_view *tmp; - unsigned i, real_num_views = 0; - - if (views == NULL) - return; - - for (i = 0; i < count; i++) { - if (views[i]) - real_num_views++; - } - for (i = 0; i < rctx->nvs_view; i++) { - tmp = &rctx->vs_view[i]->view; - pipe_sampler_view_reference(&tmp, NULL); - rctx->vs_view[i] = NULL; - } - rctx->nvs_view = real_num_views; - for (i = 0; i < count; i++) { - if (!views[i]) - continue; - rtexture = LIST_ENTRY(struct r600_texture_resource, views[i], view); - rctx->vs_view[i] = rtexture; - tmp = NULL; - pipe_sampler_view_reference(&tmp, views[i]); - rtexture->state->id = R600_VS_RESOURCE + i; - } -} - -static void r600_set_scissor_state(struct pipe_context *ctx, - const struct pipe_scissor_state *state) -{ - struct r600_screen *rscreen = r600_screen(ctx->screen); - struct r600_context *rctx = r600_context(ctx); - struct radeon_state *rstate; - u32 tl, br; - - tl = S_028240_TL_X(state->minx) | S_028240_TL_Y(state->miny) | S_028240_WINDOW_OFFSET_DISABLE(1); - br = S_028244_BR_X(state->maxx) | S_028244_BR_Y(state->maxy); - rstate = radeon_state(rscreen->rw, R600_SCISSOR_TYPE, R600_SCISSOR); - if (rstate == NULL) - return; - rstate->states[R600_SCISSOR__PA_SC_SCREEN_SCISSOR_TL] = tl; - rstate->states[R600_SCISSOR__PA_SC_SCREEN_SCISSOR_BR] = br; - rstate->states[R600_SCISSOR__PA_SC_WINDOW_OFFSET] = 0x00000000; - rstate->states[R600_SCISSOR__PA_SC_WINDOW_SCISSOR_TL] = tl; - rstate->states[R600_SCISSOR__PA_SC_WINDOW_SCISSOR_BR] = br; - rstate->states[R600_SCISSOR__PA_SC_CLIPRECT_RULE] = 0x0000FFFF; - rstate->states[R600_SCISSOR__PA_SC_CLIPRECT_0_TL] = tl; - rstate->states[R600_SCISSOR__PA_SC_CLIPRECT_0_BR] = br; - rstate->states[R600_SCISSOR__PA_SC_CLIPRECT_1_TL] = tl; - rstate->states[R600_SCISSOR__PA_SC_CLIPRECT_1_BR] = br; - rstate->states[R600_SCISSOR__PA_SC_CLIPRECT_2_TL] = tl; - rstate->states[R600_SCISSOR__PA_SC_CLIPRECT_2_BR] = br; - rstate->states[R600_SCISSOR__PA_SC_CLIPRECT_3_TL] = tl; - rstate->states[R600_SCISSOR__PA_SC_CLIPRECT_3_BR] = br; - rstate->states[R600_SCISSOR__PA_SC_EDGERULE] = 0xAAAAAAAA; - rstate->states[R600_SCISSOR__PA_SC_GENERIC_SCISSOR_TL] = tl; - rstate->states[R600_SCISSOR__PA_SC_GENERIC_SCISSOR_BR] = br; - rstate->states[R600_SCISSOR__PA_SC_VPORT_SCISSOR_0_TL] = tl; - rstate->states[R600_SCISSOR__PA_SC_VPORT_SCISSOR_0_BR] = br; - if (radeon_state_pm4(rstate)) { - radeon_state_decref(rstate); - return; - } - radeon_draw_set_new(rctx->draw, rstate); -} - -static void r600_set_viewport_state(struct pipe_context *ctx, - const struct pipe_viewport_state *state) -{ - struct r600_screen *rscreen = r600_screen(ctx->screen); - struct r600_context *rctx = r600_context(ctx); - struct radeon_state *rstate; - - rstate = radeon_state(rscreen->rw, R600_VIEWPORT_TYPE, R600_VIEWPORT); - if (rstate == NULL) - return; - rstate->states[R600_VIEWPORT__PA_SC_VPORT_ZMIN_0] = 0x00000000; - rstate->states[R600_VIEWPORT__PA_SC_VPORT_ZMAX_0] = 0x3F800000; - rstate->states[R600_VIEWPORT__PA_CL_VPORT_XSCALE_0] = fui(state->scale[0]); - rstate->states[R600_VIEWPORT__PA_CL_VPORT_YSCALE_0] = fui(state->scale[1]); - rstate->states[R600_VIEWPORT__PA_CL_VPORT_ZSCALE_0] = fui(state->scale[2]); - rstate->states[R600_VIEWPORT__PA_CL_VPORT_XOFFSET_0] = fui(state->translate[0]); - rstate->states[R600_VIEWPORT__PA_CL_VPORT_YOFFSET_0] = fui(state->translate[1]); - rstate->states[R600_VIEWPORT__PA_CL_VPORT_ZOFFSET_0] = fui(state->translate[2]); - rstate->states[R600_VIEWPORT__PA_CL_VTE_CNTL] = 0x0000043F; - if (radeon_state_pm4(rstate)) { - radeon_state_decref(rstate); - return; - } - radeon_draw_set_new(rctx->draw, rstate); - rctx->viewport = *state; -} - -static void r600_set_vertex_buffers(struct pipe_context *ctx, - unsigned count, - const struct pipe_vertex_buffer *buffers) -{ - struct r600_context *rctx = r600_context(ctx); - - memcpy(rctx->vertex_buffer, buffers, sizeof(struct pipe_vertex_buffer) * count); - rctx->nvertex_buffer = count; -} - - -static void *r600_create_vertex_elements_state(struct pipe_context *ctx, - unsigned count, - const struct pipe_vertex_element *elements) -{ - struct r600_vertex_elements_state *v = CALLOC_STRUCT(r600_vertex_elements_state); - - assert(count < 32); - v->count = count; - memcpy(v->elements, elements, count * sizeof(struct pipe_vertex_element)); - return v; -} - -static void r600_bind_vertex_elements_state(struct pipe_context *ctx, void *state) -{ - struct r600_context *rctx = r600_context(ctx); - struct r600_vertex_elements_state *v = (struct r600_vertex_elements_state*)state; - - rctx->vertex_elements = v; -} - -static void r600_delete_vertex_elements_state(struct pipe_context *ctx, void *state) -{ - FREE(state); -} - -static void *r600_create_dsa_state(struct pipe_context *ctx, - const struct pipe_depth_stencil_alpha_state *state) -{ - struct r600_screen *rscreen = r600_screen(ctx->screen); - struct radeon_state *rstate; - unsigned db_depth_control; - - rstate = radeon_state(rscreen->rw, R600_DSA_TYPE, R600_DSA); - if (rstate == NULL) - return NULL; - db_depth_control = 0x00700700 | S_028800_Z_ENABLE(state->depth.enabled) | S_028800_Z_WRITE_ENABLE(state->depth.writemask) | S_028800_ZFUNC(state->depth.func); - - rstate->states[R600_DSA__DB_STENCIL_CLEAR] = 0x00000000; - rstate->states[R600_DSA__DB_DEPTH_CLEAR] = 0x3F800000; - rstate->states[R600_DSA__SX_ALPHA_TEST_CONTROL] = 0x00000000; - rstate->states[R600_DSA__DB_STENCILREFMASK] = 0xFFFFFF00; - rstate->states[R600_DSA__DB_STENCILREFMASK_BF] = 0xFFFFFF00; - rstate->states[R600_DSA__SX_ALPHA_REF] = 0x00000000; - rstate->states[R600_DSA__SPI_FOG_FUNC_SCALE] = 0x00000000; - rstate->states[R600_DSA__SPI_FOG_FUNC_BIAS] = 0x00000000; - rstate->states[R600_DSA__SPI_FOG_CNTL] = 0x00000000; - rstate->states[R600_DSA__DB_DEPTH_CONTROL] = db_depth_control; - rstate->states[R600_DSA__DB_SHADER_CONTROL] = 0x00000210; - rstate->states[R600_DSA__DB_RENDER_CONTROL] = 0x00000060; - rstate->states[R600_DSA__DB_RENDER_OVERRIDE] = 0x0000002A; - rstate->states[R600_DSA__DB_SRESULTS_COMPARE_STATE1] = 0x00000000; - rstate->states[R600_DSA__DB_PRELOAD_CONTROL] = 0x00000000; - rstate->states[R600_DSA__DB_ALPHA_TO_MASK] = 0x0000AA00; if (radeon_state_pm4(rstate)) { radeon_state_decref(rstate); return NULL; @@ -679,105 +1037,100 @@ static void *r600_create_dsa_state(struct pipe_context *ctx, return rstate; } -static void r600_bind_dsa_state(struct pipe_context *ctx, void *state) -{ - struct r600_context *rctx = r600_context(ctx); - radeon_draw_set(rctx->draw, state); -} - -static void r600_set_constant_buffer(struct pipe_context *ctx, - uint shader, uint index, - struct pipe_resource *buffer) +int r600_context_hw_states(struct r600_context *rctx) { - struct r600_screen *rscreen = r600_screen(ctx->screen); - struct r600_context *rctx = r600_context(ctx); - unsigned nconstant = 0, i, type, id; - struct radeon_state *rstate; - struct pipe_transfer *transfer; - u32 *ptr; - - switch (shader) { - case PIPE_SHADER_VERTEX: - id = R600_VS_CONSTANT; - type = R600_VS_CONSTANT_TYPE; - break; - case PIPE_SHADER_FRAGMENT: - id = R600_PS_CONSTANT; - type = R600_PS_CONSTANT_TYPE; - break; - default: - fprintf(stderr, "%s:%d unsupported %d\n", __func__, __LINE__, shader); - return; + unsigned i; + int r; + + /* free previous TODO determine what need to be updated, what + * doesn't + */ + //radeon_state_decref(rctx->hw_states.config); + //radeon_state_decref(rctx->hw_states.cb_cntl); + radeon_state_decref(rctx->hw_states.db); + radeon_state_decref(rctx->hw_states.rasterizer); + radeon_state_decref(rctx->hw_states.scissor); + radeon_state_decref(rctx->hw_states.dsa); + radeon_state_decref(rctx->hw_states.blend); + radeon_state_decref(rctx->hw_states.viewport); + radeon_state_decref(rctx->hw_states.cb0); + for (i = 0; i < rctx->hw_states.ps_nresource; i++) { + radeon_state_decref(rctx->hw_states.ps_resource[i]); + rctx->hw_states.ps_resource[i] = NULL; } - if (buffer && buffer->width0 > 0) { - nconstant = buffer->width0 / 16; - ptr = pipe_buffer_map(ctx, buffer, PIPE_TRANSFER_READ, &transfer); - if (ptr == NULL) - return; - for (i = 0; i < nconstant; i++) { - rstate = radeon_state(rscreen->rw, type, id + i); - if (rstate == NULL) - return; - rstate->states[R600_PS_CONSTANT__SQ_ALU_CONSTANT0_0] = ptr[i * 4 + 0]; - rstate->states[R600_PS_CONSTANT__SQ_ALU_CONSTANT1_0] = ptr[i * 4 + 1]; - rstate->states[R600_PS_CONSTANT__SQ_ALU_CONSTANT2_0] = ptr[i * 4 + 2]; - rstate->states[R600_PS_CONSTANT__SQ_ALU_CONSTANT3_0] = ptr[i * 4 + 3]; - if (radeon_state_pm4(rstate)) - return; - if (radeon_draw_set_new(rctx->draw, rstate)) - return; + rctx->hw_states.ps_nresource = 0; + for (i = 0; i < rctx->hw_states.ps_nsampler; i++) { + radeon_state_decref(rctx->hw_states.ps_sampler[i]); + rctx->hw_states.ps_sampler[i] = NULL; + } + rctx->hw_states.ps_nsampler = 0; + + /* build new states */ + rctx->hw_states.rasterizer = r600_rasterizer(rctx); + rctx->hw_states.scissor = r600_scissor(rctx); + rctx->hw_states.dsa = r600_dsa(rctx); + rctx->hw_states.blend = r600_blend(rctx); + rctx->hw_states.viewport = r600_viewport(rctx); + rctx->hw_states.cb0 = r600_cb0(rctx); + rctx->hw_states.db = r600_db(rctx); + for (i = 0; i < rctx->ps_nsampler; i++) { + if (rctx->ps_sampler[i]) { + rctx->hw_states.ps_sampler[i] = r600_sampler(rctx, + &rctx->ps_sampler[i]->state.sampler, + R600_PS_SAMPLER + i); } - pipe_buffer_unmap(ctx, buffer, transfer); } -} - -static void r600_set_stencil_ref(struct pipe_context *ctx, - const struct pipe_stencil_ref *sr) -{ - struct r600_context *rctx = r600_context(ctx); - rctx->stencil_ref = *sr; -} - -static void r600_set_sample_mask(struct pipe_context *pipe, unsigned sample_mask) -{ -} - -void r600_init_state_functions(struct r600_context *rctx) -{ - rctx->context.set_sample_mask = r600_set_sample_mask; - rctx->context.create_blend_state = r600_create_blend_state; - rctx->context.bind_blend_state = r600_bind_blend_state; - rctx->context.delete_blend_state = r600_delete_state; - rctx->context.set_blend_color = r600_set_blend_color; - rctx->context.set_clip_state = r600_set_clip_state; - rctx->context.set_constant_buffer = r600_set_constant_buffer; - rctx->context.create_depth_stencil_alpha_state = r600_create_dsa_state; - rctx->context.bind_depth_stencil_alpha_state = r600_bind_dsa_state; - rctx->context.delete_depth_stencil_alpha_state = r600_delete_state; - rctx->context.set_framebuffer_state = r600_set_framebuffer_state; - rctx->context.create_fs_state = r600_create_fs_state; - rctx->context.bind_fs_state = r600_bind_fs_state; - rctx->context.delete_fs_state = r600_delete_state; - rctx->context.set_polygon_stipple = r600_set_polygon_stipple; - rctx->context.create_rasterizer_state = r600_create_rs_state; - rctx->context.bind_rasterizer_state = r600_bind_rs_state; - rctx->context.delete_rasterizer_state = r600_delete_state; - rctx->context.create_sampler_state = r600_create_sampler_state; - rctx->context.bind_fragment_sampler_states = r600_bind_sampler_states; - rctx->context.bind_vertex_sampler_states = r600_bind_sampler_states; - rctx->context.delete_sampler_state = r600_delete_state; - rctx->context.create_sampler_view = r600_create_sampler_view; - rctx->context.sampler_view_destroy = r600_sampler_view_destroy; - rctx->context.set_fragment_sampler_views = r600_set_fragment_sampler_views; - rctx->context.set_vertex_sampler_views = r600_set_vertex_sampler_views; - rctx->context.set_scissor_state = r600_set_scissor_state; - rctx->context.set_viewport_state = r600_set_viewport_state; - rctx->context.set_vertex_buffers = r600_set_vertex_buffers; - rctx->context.create_vertex_elements_state = r600_create_vertex_elements_state; - rctx->context.bind_vertex_elements_state = r600_bind_vertex_elements_state; - rctx->context.delete_vertex_elements_state = r600_delete_vertex_elements_state; - rctx->context.create_vs_state = r600_create_vs_state; - rctx->context.bind_vs_state = r600_bind_vs_state; - rctx->context.delete_vs_state = r600_delete_state; - rctx->context.set_stencil_ref = r600_set_stencil_ref; + rctx->hw_states.ps_nsampler = rctx->ps_nsampler; + for (i = 0; i < rctx->ps_nsampler_view; i++) { + if (rctx->ps_sampler_view[i]) { + rctx->hw_states.ps_resource[i] = r600_resource(rctx, + &rctx->ps_sampler_view[i]->state.sampler_view, + R600_PS_RESOURCE + i); + } + } + rctx->hw_states.ps_nresource = rctx->ps_nsampler_view; + + /* bind states */ + r = radeon_draw_set(rctx->draw, rctx->hw_states.db); + if (r) + return r; + r = radeon_draw_set(rctx->draw, rctx->hw_states.rasterizer); + if (r) + return r; + r = radeon_draw_set(rctx->draw, rctx->hw_states.scissor); + if (r) + return r; + r = radeon_draw_set(rctx->draw, rctx->hw_states.dsa); + if (r) + return r; + r = radeon_draw_set(rctx->draw, rctx->hw_states.blend); + if (r) + return r; + r = radeon_draw_set(rctx->draw, rctx->hw_states.viewport); + if (r) + return r; + r = radeon_draw_set(rctx->draw, rctx->hw_states.cb0); + if (r) + return r; + r = radeon_draw_set(rctx->draw, rctx->hw_states.config); + if (r) + return r; + r = radeon_draw_set(rctx->draw, rctx->hw_states.cb_cntl); + if (r) + return r; + for (i = 0; i < rctx->hw_states.ps_nresource; i++) { + if (rctx->hw_states.ps_resource[i]) { + r = radeon_draw_set(rctx->draw, rctx->hw_states.ps_resource[i]); + if (r) + return r; + } + } + for (i = 0; i < rctx->hw_states.ps_nsampler; i++) { + if (rctx->hw_states.ps_sampler[i]) { + r = radeon_draw_set(rctx->draw, rctx->hw_states.ps_sampler[i]); + if (r) + return r; + } + } + return 0; } diff --git a/src/gallium/winsys/r600/drm/r600_drm.c b/src/gallium/winsys/r600/drm/r600_drm.c index 3d87a994c1..9520792f54 100644 --- a/src/gallium/winsys/r600/drm/r600_drm.c +++ b/src/gallium/winsys/r600/drm/r600_drm.c @@ -30,7 +30,7 @@ #include "util/u_debug.h" #include "radeon_priv.h" #include "r600_screen.h" -#include "r600_texture.h" +#include "r600_resource.h" #include "r600_public.h" #include "r600_drm_public.h" #include "state_tracker/drm_driver.h" @@ -45,7 +45,7 @@ boolean r600_buffer_get_handle(struct radeon *rw, struct winsys_handle *whandle) { struct drm_gem_flink flink; - struct r600_buffer* rbuffer = (struct r600_buffer*)buf; + struct r600_resource* rbuffer = (struct r600_buffer*)buf; if (whandle->type == DRM_API_HANDLE_TYPE_SHARED) { if (!rbuffer->flink) { -- cgit v1.2.3 From 35e044ab562b65aa53f9d9d7b5885e6a887774bb Mon Sep 17 00:00:00 2001 From: Jerome Glisse Date: Wed, 28 Jul 2010 19:59:38 -0400 Subject: r600g: switch btw flat/linear interpolation I am not sure how to properly handle flat shading regarding non color parameter to fragment shader. It seems we should still interpolate non color using linear interpolation and flat shade only apply to color. Signed-off-by: Jerome Glisse --- src/gallium/drivers/r600/r600_shader.c | 7 ++++++- src/gallium/drivers/r600/r600_shader.h | 1 + src/gallium/drivers/r600/r600_state.c | 2 -- 3 files changed, 7 insertions(+), 3 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index 8837a7272b..3f1979b9cc 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -169,7 +169,10 @@ static int r600_pipe_shader_ps(struct pipe_context *ctx, struct r600_context_sta for (i = 0; i < rshader->ninput; i++) { tmp = S_028644_SEMANTIC(rshader->input[i].sid); tmp |= S_028644_SEL_CENTROID(1); - tmp |= S_028644_FLAT_SHADE(rshader->flat_shade); + if (rshader->input[i].name == TGSI_SEMANTIC_COLOR || + rshader->input[i].name == TGSI_SEMANTIC_BCOLOR) { + tmp |= S_028644_FLAT_SHADE(rshader->flat_shade); + } state->states[R600_PS_SHADER__SPI_PS_INPUT_CNTL_0 + i] = tmp; } state->states[R600_PS_SHADER__SPI_PS_IN_CONTROL_0] = S_0286CC_NUM_INTERP(rshader->ninput) | @@ -287,6 +290,7 @@ static int tgsi_declaration(struct r600_shader_ctx *ctx) i = ctx->shader->ninput++; ctx->shader->input[i].name = d->Semantic.Name; ctx->shader->input[i].sid = d->Semantic.Index; + ctx->shader->input[i].interpolate = d->Declaration.Interpolate; ctx->shader->input[i].gpr = ctx->file_offset[TGSI_FILE_INPUT] + i; if (ctx->type == TGSI_PROCESSOR_VERTEX) { /* turn input into fetch */ @@ -313,6 +317,7 @@ static int tgsi_declaration(struct r600_shader_ctx *ctx) ctx->shader->output[i].name = d->Semantic.Name; ctx->shader->output[i].sid = d->Semantic.Index; ctx->shader->output[i].gpr = ctx->file_offset[TGSI_FILE_OUTPUT] + i; + ctx->shader->output[i].interpolate = d->Declaration.Interpolate; break; case TGSI_FILE_CONSTANT: case TGSI_FILE_TEMPORARY: diff --git a/src/gallium/drivers/r600/r600_shader.h b/src/gallium/drivers/r600/r600_shader.h index 23b6a83b9a..ee0381e8bd 100644 --- a/src/gallium/drivers/r600/r600_shader.h +++ b/src/gallium/drivers/r600/r600_shader.h @@ -29,6 +29,7 @@ struct r600_shader_io { unsigned name; unsigned gpr; int sid; + unsigned interpolate; }; struct r600_shader { diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index 0f1e1cd761..5b98dbe236 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -692,8 +692,6 @@ static struct radeon_state *r600_rasterizer(struct r600_context *rctx) struct radeon_state *rstate; rctx->flat_shade = state->flatshade; - rctx->flat_shade = 0; -R600_ERR("flat shade with texture broke tex coord interp\n"); rstate = radeon_state(rscreen->rw, R600_RASTERIZER_TYPE, R600_RASTERIZER); if (rstate == NULL) return NULL; -- cgit v1.2.3 From 42c1f27149828e5b5143f5e53ca3bd7c04a4e762 Mon Sep 17 00:00:00 2001 From: Jerome Glisse Date: Wed, 28 Jul 2010 20:09:15 -0400 Subject: r600g: state context ptr in sampler_view & add I8/L8 buffer format Signed-off-by: Jerome Glisse --- src/gallium/drivers/r600/r600_helper.c | 2 ++ src/gallium/drivers/r600/r600_state.c | 1 + 2 files changed, 3 insertions(+) (limited to 'src/gallium') diff --git a/src/gallium/drivers/r600/r600_helper.c b/src/gallium/drivers/r600/r600_helper.c index 7241ab1c17..132abf90a3 100644 --- a/src/gallium/drivers/r600/r600_helper.c +++ b/src/gallium/drivers/r600/r600_helper.c @@ -59,6 +59,8 @@ int r600_conv_pipe_format(unsigned pformat, unsigned *format) case PIPE_FORMAT_L8_UNORM: case PIPE_FORMAT_A8_UNORM: case PIPE_FORMAT_I8_UNORM: + *format = V_0280A0_COLOR_8; + return 0; case PIPE_FORMAT_L16_UNORM: case PIPE_FORMAT_Z16_UNORM: case PIPE_FORMAT_Z32_UNORM: diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index 5b98dbe236..ff5df855c6 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -83,6 +83,7 @@ static struct pipe_sampler_view *r600_create_sampler_view(struct pipe_context *c pipe_reference(NULL, &texture->reference); rstate->state.sampler_view.texture = texture; rstate->state.sampler_view.reference.count = 1; + rstate->state.sampler_view.context = ctx; return &rstate->state.sampler_view; } -- cgit v1.2.3 From aa44bd9189848b91619335207b8ec3be5679f982 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Date: Wed, 28 Jul 2010 22:18:14 -0400 Subject: Untangle gallium/egl/glx source sharing mess and make it compile again --- src/gallium/state_trackers/egl/x11/glxinit.c | 124 +++++++----------------- src/gallium/state_trackers/egl/x11/glxinit.h | 19 +++- src/gallium/state_trackers/egl/x11/x11_screen.c | 29 ++---- 3 files changed, 58 insertions(+), 114 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/state_trackers/egl/x11/glxinit.c b/src/gallium/state_trackers/egl/x11/glxinit.c index 809a0987e5..57c6aaff86 100644 --- a/src/gallium/state_trackers/egl/x11/glxinit.c +++ b/src/gallium/state_trackers/egl/x11/glxinit.c @@ -10,10 +10,16 @@ #include #include #include +#include #include #include #include +#include "GL/glxproto.h" +#include "GL/glxtokens.h" +#include "GL/gl.h" /* for GL types needed by __GLcontextModes */ +#include "GL/internal/glcore.h" /* for __GLcontextModes */ + #include "glxinit.h" #ifdef GLX_DIRECT_RENDERING @@ -55,9 +61,9 @@ static /* const */ XExtensionHooks __glXExtensionHooks = { NULL, /* error_string */ }; -XEXT_GENERATE_FIND_DISPLAY(__glXFindDisplay, __glXExtensionInfo, - __glXExtensionName, &__glXExtensionHooks, - __GLX_NUMBER_EVENTS, NULL) +static XEXT_GENERATE_FIND_DISPLAY(__glXFindDisplay, __glXExtensionInfo, + __glXExtensionName, &__glXExtensionHooks, + __GLX_NUMBER_EVENTS, NULL) static GLint _gl_convert_from_x_visual_type(int visualType) @@ -73,6 +79,17 @@ _gl_convert_from_x_visual_type(int visualType) ? glx_visual_types[visualType] : GLX_NONE; } +static void +_gl_context_modes_destroy(__GLcontextModes * modes) +{ + while (modes != NULL) { + __GLcontextModes *const next = modes->next; + + Xfree(modes); + modes = next; + } +} + static __GLcontextModes * _gl_context_modes_create(unsigned count, size_t minimum_size) { @@ -116,18 +133,7 @@ _gl_context_modes_create(unsigned count, size_t minimum_size) return base; } -_X_HIDDEN void -_gl_context_modes_destroy(__GLcontextModes * modes) -{ - while (modes != NULL) { - __GLcontextModes *const next = modes->next; - - Xfree(modes); - modes = next; - } -} - -_X_HIDDEN char * +static char * __glXQueryServerString(Display * dpy, int opcode, CARD32 screen, CARD32 name) { xGLXGenericGetStringReq *req; @@ -194,10 +200,6 @@ FreeScreenConfigs(__GLXdisplayPrivate * priv) _gl_context_modes_destroy(psc->configs); psc->configs = NULL; /* NOTE: just for paranoia */ } - if (psc->visuals) { - _gl_context_modes_destroy(psc->visuals); - psc->visuals = NULL; /* NOTE: just for paranoia */ - } Xfree((char *) psc->serverGLXexts); } XFree((char *) priv->screenConfigs); @@ -215,14 +217,8 @@ __glXFreeDisplayPrivate(XExtData * extension) priv = (__GLXdisplayPrivate *) extension->private_data; FreeScreenConfigs(priv); - if (priv->serverGLXvendor) { - Xfree((char *) priv->serverGLXvendor); - priv->serverGLXvendor = 0x0; /* to protect against double free's */ - } - if (priv->serverGLXversion) { + if (priv->serverGLXversion) Xfree((char *) priv->serverGLXversion); - priv->serverGLXversion = 0x0; /* to protect against double free's */ - } Xfree((char *) priv); return 0; @@ -234,6 +230,10 @@ __glXFreeDisplayPrivate(XExtData * extension) ** Query the version of the GLX extension. This procedure works even if ** the client extension is not completely set up. */ + +#define GLX_MAJOR_VERSION 1 /* current version numbers */ +#define GLX_MINOR_VERSION 4 + static Bool QueryVersion(Display * dpy, int opcode, int *major, int *minor) { @@ -263,7 +263,13 @@ QueryVersion(Display * dpy, int opcode, int *major, int *minor) return GL_TRUE; } -_X_HIDDEN void +#define __GLX_MIN_CONFIG_PROPS 18 +#define __GLX_MAX_CONFIG_PROPS 500 +#define __GLX_EXT_CONFIG_PROPS 10 +#define __GLX_TOTAL_CONFIG (__GLX_MIN_CONFIG_PROPS + \ + 2 * __GLX_EXT_CONFIG_PROPS) + +static void __glXInitializeVisualConfigFromTags(__GLcontextModes * config, int count, const INT32 * bp, Bool tagged_only, Bool fbconfig_style_tags) @@ -505,35 +511,6 @@ createConfigsFromProperties(Display * dpy, int nvisuals, int nprops, return modes; } -static GLboolean -getVisualConfigs(__GLXscreenConfigs *psc, - __GLXdisplayPrivate *priv, int screen) -{ - xGLXGetVisualConfigsReq *req; - xGLXGetVisualConfigsReply reply; - Display *dpy = priv->dpy; - - LockDisplay(dpy); - - psc->visuals = NULL; - GetReq(GLXGetVisualConfigs, req); - req->reqType = priv->majorOpcode; - req->glxCode = X_GLXGetVisualConfigs; - req->screen = screen; - - if (!_XReply(dpy, (xReply *) & reply, 0, False)) - goto out; - - psc->visuals = createConfigsFromProperties(dpy, - reply.numVisuals, - reply.numProps, - screen, GL_FALSE); - - out: - UnlockDisplay(dpy); - return psc->visuals != NULL; -} - static GLboolean getFBConfigs(__GLXscreenConfigs *psc, __GLXdisplayPrivate *priv, int screen) { @@ -581,32 +558,6 @@ getFBConfigs(__GLXscreenConfigs *psc, __GLXdisplayPrivate *priv, int screen) return psc->configs != NULL; } -_X_HIDDEN Bool -glx_screen_init(__GLXscreenConfigs *psc, - int screen, __GLXdisplayPrivate * priv) -{ - /* Initialize per screen dynamic client GLX extensions */ - psc->ext_list_first_time = GL_TRUE; - psc->scr = screen; - psc->dpy = priv->dpy; - - getVisualConfigs(psc, priv, screen); - getFBConfigs(psc, priv, screen); - - return GL_TRUE; -} - -static __GLXscreenConfigs * -createIndirectScreen() -{ - __GLXscreenConfigs *psc; - - psc = Xmalloc(sizeof *psc); - memset(psc, 0, sizeof *psc); - - return psc; -} - static GLboolean AllocAndFetchScreenConfigs(Display * dpy, __GLXdisplayPrivate * priv) { @@ -630,10 +581,10 @@ AllocAndFetchScreenConfigs(Display * dpy, __GLXdisplayPrivate * priv) } for (i = 0; i < screens; i++) { - psc = createIndirectScreen(); + psc = Xcalloc(1, sizeof *psc); if (!psc) return GL_FALSE; - glx_screen_init(psc, i, priv); + getFBConfigs(psc, priv, i); priv->screenConfigs[i] = psc; } @@ -682,13 +633,8 @@ __glXInitialize(Display * dpy) ** structures from the server. */ dpyPriv->majorOpcode = info->codes->major_opcode; - dpyPriv->majorVersion = major; - dpyPriv->minorVersion = minor; dpyPriv->dpy = dpy; - dpyPriv->serverGLXvendor = NULL; - dpyPriv->serverGLXversion = NULL; - if (!AllocAndFetchScreenConfigs(dpy, dpyPriv)) { Xfree(dpyPriv); Xfree(private); diff --git a/src/gallium/state_trackers/egl/x11/glxinit.h b/src/gallium/state_trackers/egl/x11/glxinit.h index 1cc7c460fe..4078aef2fe 100644 --- a/src/gallium/state_trackers/egl/x11/glxinit.h +++ b/src/gallium/state_trackers/egl/x11/glxinit.h @@ -2,10 +2,21 @@ #define GLXINIT_INCLUDED #include -#include "glxclient.h" +#include -/* this is used by DRI loaders */ -extern void -_gl_context_modes_destroy(__GLcontextModes * modes); +typedef struct { + __GLcontextModes *configs; + char *serverGLXexts; +} __GLXscreenConfigs; + +typedef struct { + Display *dpy; + __GLXscreenConfigs **screenConfigs; + char *serverGLXversion; + int majorOpcode; + struct x11_screen *xscr; +} __GLXdisplayPrivate; + +extern __GLXdisplayPrivate *__glXInitialize(Display * dpy); #endif /* GLXINIT_INCLUDED */ diff --git a/src/gallium/state_trackers/egl/x11/x11_screen.c b/src/gallium/state_trackers/egl/x11/x11_screen.c index bc6482ab15..c07ebb7ef6 100644 --- a/src/gallium/state_trackers/egl/x11/x11_screen.c +++ b/src/gallium/state_trackers/egl/x11/x11_screen.c @@ -39,11 +39,6 @@ #include "glxinit.h" struct x11_screen { -#ifdef GLX_DIRECT_RENDERING - /* dummy base class */ - struct __GLXDRIdisplayRec base; -#endif - Display *dpy; int number; @@ -108,7 +103,7 @@ x11_screen_destroy(struct x11_screen *xscr) #ifdef GLX_DIRECT_RENDERING /* xscr->glx_dpy will be destroyed with the X display */ if (xscr->glx_dpy) - xscr->glx_dpy->dri2Display = NULL; + xscr->glx_dpy->xscr = NULL; #endif if (xscr->visuals) @@ -230,17 +225,6 @@ x11_screen_get_glx_configs(struct x11_screen *xscr) : NULL; } -/** - * Return the GLX visuals. - */ -const __GLcontextModes * -x11_screen_get_glx_visuals(struct x11_screen *xscr) -{ - return (x11_screen_init_glx(xscr)) - ? xscr->glx_dpy->screenConfigs[xscr->number]->visuals - : NULL; -} - /** * Probe the screen for the DRI2 driver name. */ @@ -306,14 +290,14 @@ x11_screen_enable_dri2(struct x11_screen *xscr, close(fd); return -1; } - if (xscr->glx_dpy->dri2Display) { + if (xscr->glx_dpy->xscr) { _eglLog(_EGL_WARNING, "display is already managed by another x11 screen"); close(fd); return -1; } - xscr->glx_dpy->dri2Display = (__GLXDRIdisplay *) xscr; + xscr->glx_dpy->xscr = xscr; xscr->dri_invalidate_buffers = invalidate_buffers; xscr->dri_user_data = user_data; @@ -428,6 +412,9 @@ x11_context_modes_count(const __GLcontextModes *modes) return count; } +extern void +dri2InvalidateBuffers(Display *dpy, XID drawable); + /** * This is called from src/glx/dri2.c. */ @@ -437,8 +424,8 @@ dri2InvalidateBuffers(Display *dpy, XID drawable) __GLXdisplayPrivate *priv = __glXInitialize(dpy); struct x11_screen *xscr = NULL; - if (priv && priv->dri2Display) - xscr = (struct x11_screen *) priv->dri2Display; + if (priv && priv->xscr) + xscr = priv->xscr; if (!xscr || !xscr->dri_invalidate_buffers) return; -- cgit v1.2.3 From c5e9d3114a80d6d35a2f4e65783cdc75fcc2deac Mon Sep 17 00:00:00 2001 From: Chia-I Wu Date: Fri, 16 Jul 2010 04:02:55 +0800 Subject: gallium: Add pipe_context::draw_vbo and pipe_context::set_index_buffer. This commit adds a new unified draw_vbo method to pipe_context. Unlike other draw methods, draw_vbo treats the index buffer as a state which is set with set_index_buffer. --- src/gallium/include/pipe/p_context.h | 6 ++++++ src/gallium/include/pipe/p_state.h | 35 +++++++++++++++++++++++++++++++++++ 2 files changed, 41 insertions(+) (limited to 'src/gallium') diff --git a/src/gallium/include/pipe/p_context.h b/src/gallium/include/pipe/p_context.h index 7ec3d63a3f..3314b1e0e0 100644 --- a/src/gallium/include/pipe/p_context.h +++ b/src/gallium/include/pipe/p_context.h @@ -102,6 +102,9 @@ struct pipe_context { unsigned start, unsigned count); + void (*draw_vbo)( struct pipe_context *pipe, + const struct pipe_draw_info *info ); + /** * Draw the stream output buffer at index 0 */ @@ -249,6 +252,9 @@ struct pipe_context { unsigned num_buffers, const struct pipe_vertex_buffer * ); + void (*set_index_buffer)( struct pipe_context *pipe, + const struct pipe_index_buffer * ); + void (*set_stream_output_buffers)(struct pipe_context *, struct pipe_resource **buffers, int *offsets, /*array of offsets diff --git a/src/gallium/include/pipe/p_state.h b/src/gallium/include/pipe/p_state.h index 301fe2b74f..0f1a44cde4 100644 --- a/src/gallium/include/pipe/p_state.h +++ b/src/gallium/include/pipe/p_state.h @@ -426,6 +426,41 @@ struct pipe_vertex_element }; +/** + * An index buffer. When an index buffer is bound, all indices to vertices + * will be looked up in the buffer. + */ +struct pipe_index_buffer +{ + unsigned index_size; /**< size of an index, in bytes */ + unsigned offset; /**< offset to start of data in buffer, in bytes */ + struct pipe_resource *buffer; /**< the actual buffer */ +}; + + +/** + * Information to describe a draw_vbo call. + */ +struct pipe_draw_info +{ + boolean indexed; /**< use index buffer */ + + unsigned mode; /**< the mode of the primitive */ + unsigned start; /**< the index of the first vertex */ + unsigned count; /**< number of vertices */ + + unsigned start_instance; /**< first instance id */ + unsigned instance_count; /**< number of instances */ + + /** + * For indexed drawing, these fields apply after index lookup. + */ + int index_bias; /**< a bias to be added to each index */ + unsigned min_index; /**< the min index */ + unsigned max_index; /**< the max index */ +}; + + #ifdef __cplusplus } #endif -- cgit v1.2.3 From 6d28bf917fb1d741d90fd3f05c22769376021fca Mon Sep 17 00:00:00 2001 From: Chia-I Wu Date: Fri, 16 Jul 2010 04:35:58 +0800 Subject: gallium: Implement draw_vbo and set_index_buffer for all drivers. Some drivers define a generic function that is called by all drawing functions. To implement draw_vbo for such drivers, either draw_vbo calls the generic function or the prototype of the generic function is changed to match draw_vbo. Other drivers have no such generic function. draw_vbo is implemented by calling either draw_arrays and draw_elements. For most drivers, set_index_buffer does not mark the state dirty for tracking. Instead, the index buffer state is emitted whenever draw_vbo is called, just like the case with draw_elements. It surely can be improved. --- src/gallium/auxiliary/util/u_draw_quad.h | 30 +++++ src/gallium/drivers/cell/ppu/cell_context.h | 1 + src/gallium/drivers/cell/ppu/cell_draw_arrays.c | 69 ++++++++--- src/gallium/drivers/cell/ppu/cell_state_vertex.c | 16 +++ src/gallium/drivers/failover/fo_context.c | 61 ++++++---- src/gallium/drivers/failover/fo_context.h | 2 + src/gallium/drivers/failover/fo_state.c | 18 +++ src/gallium/drivers/failover/fo_state_emit.c | 5 + src/gallium/drivers/galahad/glhd_context.c | 47 +++++++ src/gallium/drivers/i915/i915_context.c | 70 ++++++++--- src/gallium/drivers/i915/i915_context.h | 1 + src/gallium/drivers/i915/i915_state.c | 14 +++ src/gallium/drivers/i965/brw_context.h | 1 + src/gallium/drivers/i965/brw_draw.c | 87 ++++++++----- src/gallium/drivers/i965/brw_draw_upload.c | 7 +- src/gallium/drivers/i965/brw_pipe_vertex.c | 31 +++++ src/gallium/drivers/identity/id_context.c | 30 +++++ src/gallium/drivers/llvmpipe/lp_context.h | 1 + src/gallium/drivers/llvmpipe/lp_draw_arrays.c | 90 ++++++++++---- src/gallium/drivers/llvmpipe/lp_state_vertex.c | 14 +++ src/gallium/drivers/nv50/nv50_context.c | 1 + src/gallium/drivers/nv50/nv50_context.h | 3 + src/gallium/drivers/nv50/nv50_state.c | 15 +++ src/gallium/drivers/nv50/nv50_vbo.c | 31 +++++ src/gallium/drivers/nvfx/nvfx_context.c | 1 + src/gallium/drivers/nvfx/nvfx_context.h | 5 +- src/gallium/drivers/nvfx/nvfx_state.c | 15 +++ src/gallium/drivers/nvfx/nvfx_vbo.c | 39 +++++- src/gallium/drivers/r300/r300_context.h | 2 + src/gallium/drivers/r300/r300_render.c | 142 ++++++++++++++-------- src/gallium/drivers/r300/r300_render_stencilref.c | 22 ++++ src/gallium/drivers/r300/r300_state.c | 18 +++ src/gallium/drivers/r600/r600_context.c | 1 + src/gallium/drivers/r600/r600_context.h | 3 + src/gallium/drivers/r600/r600_draw.c | 27 ++++ src/gallium/drivers/r600/r600_state.c | 18 +++ src/gallium/drivers/rbug/rbug_context.c | 34 ++++++ src/gallium/drivers/softpipe/sp_context.c | 2 + src/gallium/drivers/softpipe/sp_context.h | 1 + src/gallium/drivers/softpipe/sp_draw_arrays.c | 92 +++++++++----- src/gallium/drivers/softpipe/sp_state.h | 7 ++ src/gallium/drivers/softpipe/sp_state_vertex.c | 14 +++ src/gallium/drivers/svga/svga_context.h | 1 + src/gallium/drivers/svga/svga_pipe_draw.c | 24 ++++ src/gallium/drivers/svga/svga_pipe_vertex.c | 19 +++ src/gallium/drivers/trace/tr_context.c | 52 ++++++++ src/gallium/drivers/trace/tr_dump_state.c | 20 +++ src/gallium/drivers/trace/tr_dump_state.h | 2 + 48 files changed, 1004 insertions(+), 202 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/auxiliary/util/u_draw_quad.h b/src/gallium/auxiliary/util/u_draw_quad.h index 42eb184428..1c9f752611 100644 --- a/src/gallium/auxiliary/util/u_draw_quad.h +++ b/src/gallium/auxiliary/util/u_draw_quad.h @@ -29,12 +29,42 @@ #define U_DRAWQUAD_H +#include "pipe/p_compiler.h" +#include "pipe/p_context.h" + + #ifdef __cplusplus extern "C" { #endif struct pipe_resource; + +static INLINE void +util_draw_init_info(struct pipe_draw_info *info) +{ + memset(info, 0, sizeof(*info)); + info->instance_count = 1; + info->max_index = 0xffffffff; +} + + +static INLINE void +util_draw_arrays(struct pipe_context *pipe, uint mode, uint start, uint count) +{ + struct pipe_draw_info info; + + util_draw_init_info(&info); + info.mode = mode; + info.start = start; + info.count = count; + info.min_index = start; + info.max_index = start + count - 1; + + pipe->draw_vbo(pipe, &info); +} + + extern void util_draw_vertex_buffer(struct pipe_context *pipe, struct pipe_resource *vbuf, uint offset, diff --git a/src/gallium/drivers/cell/ppu/cell_context.h b/src/gallium/drivers/cell/ppu/cell_context.h index dc46e59a2d..d1aee62ba1 100644 --- a/src/gallium/drivers/cell/ppu/cell_context.h +++ b/src/gallium/drivers/cell/ppu/cell_context.h @@ -132,6 +132,7 @@ struct cell_context struct pipe_viewport_state viewport; struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS]; uint num_vertex_buffers; + struct pipe_index_buffer index_buffer; ubyte *cbuf_map[PIPE_MAX_COLOR_BUFS]; ubyte *zsbuf_map; diff --git a/src/gallium/drivers/cell/ppu/cell_draw_arrays.c b/src/gallium/drivers/cell/ppu/cell_draw_arrays.c index 6a1e4d8a64..e06226fbfe 100644 --- a/src/gallium/drivers/cell/ppu/cell_draw_arrays.c +++ b/src/gallium/drivers/cell/ppu/cell_draw_arrays.c @@ -34,6 +34,7 @@ #include "pipe/p_defines.h" #include "pipe/p_context.h" #include "util/u_inlines.h" +#include "util/u_draw_quad.h" #include "cell_context.h" #include "cell_draw_arrays.h" @@ -56,16 +57,11 @@ * XXX should the element buffer be specified/bound with a separate function? */ static void -cell_draw_range_elements(struct pipe_context *pipe, - struct pipe_resource *indexBuffer, - unsigned indexSize, - int indexBias, - unsigned min_index, - unsigned max_index, - unsigned mode, unsigned start, unsigned count) +cell_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) { struct cell_context *cell = cell_context(pipe); struct draw_context *draw = cell->draw; + void *mapped_indices = NULL; unsigned i; if (cell->dirty) @@ -83,18 +79,20 @@ cell_draw_range_elements(struct pipe_context *pipe, draw_set_mapped_vertex_buffer(draw, i, buf); } /* Map index buffer, if present */ - if (indexBuffer) { - void *mapped_indexes = cell_resource(indexBuffer)->data; - draw_set_mapped_element_buffer(draw, indexSize, indexBias, mapped_indexes); - } - else { - /* no index/element buffer */ - draw_set_mapped_element_buffer(draw, 0, 0, NULL); + if (info->indexed && cell->index_buffer.buffer) { + mapped_indices = cell_resource(cell->index_buffer.buffer)->data; + mapped_indices += cell->index_buffer.offset; } + draw_set_mapped_element_buffer_range(draw, (mapped_indices) ? + lp->index_buffer.index_size : 0, + info->index_bias, + info->min_index, + info->max_index, + mapped_indices); /* draw! */ - draw_arrays(draw, mode, start, count); + draw_arrays(draw, info->mode, info->start, info->count); /* * unmap vertex/index buffers - will cause draw module to flush @@ -102,7 +100,7 @@ cell_draw_range_elements(struct pipe_context *pipe, for (i = 0; i < cell->num_vertex_buffers; i++) { draw_set_mapped_vertex_buffer(draw, i, NULL); } - if (indexBuffer) { + if (mapped_indices) { draw_set_mapped_element_buffer(draw, 0, 0, NULL); } @@ -115,6 +113,44 @@ cell_draw_range_elements(struct pipe_context *pipe, } +static void +cell_draw_range_elements(struct pipe_context *pipe, + struct pipe_resource *indexBuffer, + unsigned indexSize, + int indexBias, + unsigned min_index, + unsigned max_index, + unsigned mode, unsigned start, unsigned count) +{ + struct cell_context *cell = cell_context(pipe); + struct pipe_draw_info info; + struct pipe_index_buffer saved_ib, ib; + + util_draw_init_info(&info); + info.mode = mode; + info.start = start; + info.count = count; + info.index_bias = indexBias; + info.min_index = min_index; + info.max_index = max_index; + + if (indexBuffer) { + info.indexed = TRUE; + saved_ib = cell->index_buffer; + + ib.buffer = indexBuffer; + ib.offset = 0; + ib.index_size = indexSize; + pipe->set_index_buffer(pipe, &ib); + } + + cell_draw_vbo(pipe, &info); + + if (indexBuffer) + pipe->set_index_buffer(pipe, &saved_ib); +} + + static void cell_draw_elements(struct pipe_context *pipe, struct pipe_resource *indexBuffer, @@ -142,5 +178,6 @@ cell_init_draw_functions(struct cell_context *cell) cell->pipe.draw_arrays = cell_draw_arrays; cell->pipe.draw_elements = cell_draw_elements; cell->pipe.draw_range_elements = cell_draw_range_elements; + cell->pipe.draw_vbo = cell_draw_vbo; } diff --git a/src/gallium/drivers/cell/ppu/cell_state_vertex.c b/src/gallium/drivers/cell/ppu/cell_state_vertex.c index 69152b6cbf..4e3701cd0a 100644 --- a/src/gallium/drivers/cell/ppu/cell_state_vertex.c +++ b/src/gallium/drivers/cell/ppu/cell_state_vertex.c @@ -91,10 +91,26 @@ cell_set_vertex_buffers(struct pipe_context *pipe, } +static void +cell_set_index_buffer(struct pipe_context *pipe, + const struct pipe_index_buffer *ib) +{ + struct cell_context *cell = cell_context(pipe); + + if (ib) + memcpy(&cell->index_buffer, ib, sizeof(cell->index_buffer)); + else + memset(&cell->index_buffer, 0, sizeof(cell->index_buffer)); + + /* TODO make this more like a state */ +} + + void cell_init_vertex_functions(struct cell_context *cell) { cell->pipe.set_vertex_buffers = cell_set_vertex_buffers; + cell->pipe.set_index_buffer = cell_set_index_buffer; cell->pipe.create_vertex_elements_state = cell_create_vertex_elements_state; cell->pipe.bind_vertex_elements_state = cell_bind_vertex_elements_state; cell->pipe.delete_vertex_elements_state = cell_delete_vertex_elements_state; diff --git a/src/gallium/drivers/failover/fo_context.c b/src/gallium/drivers/failover/fo_context.c index 9c9c1bdc45..1048d58313 100644 --- a/src/gallium/drivers/failover/fo_context.c +++ b/src/gallium/drivers/failover/fo_context.c @@ -28,6 +28,7 @@ #include "pipe/p_defines.h" #include "util/u_memory.h" +#include "util/u_draw_quad.h" #include "pipe/p_context.h" #include "fo_context.h" @@ -50,13 +51,8 @@ void failover_fail_over( struct failover_context *failover ) } -static void failover_draw_elements( struct pipe_context *pipe, - struct pipe_resource *indexResource, - unsigned indexSize, - int indexBias, - unsigned prim, - unsigned start, - unsigned count) +static void failover_draw_vbo( struct pipe_context *pipe, + const struct pipe_draw_info *info) { struct failover_context *failover = failover_context( pipe ); @@ -70,13 +66,7 @@ static void failover_draw_elements( struct pipe_context *pipe, /* Try hardware: */ if (failover->mode == FO_HW) { - failover->hw->draw_elements( failover->hw, - indexResource, - indexSize, - indexBias, - prim, - start, - count ); + failover->hw->draw_vbo( failover->hw, info ); } /* Possibly try software: @@ -88,13 +78,7 @@ static void failover_draw_elements( struct pipe_context *pipe, failover_state_emit( failover ); } - failover->sw->draw_elements( failover->sw, - indexResource, - indexSize, - indexBias, - prim, - start, - count ); + failover->sw->draw_vbo( failover->sw, info ); /* Be ready to switch back to hardware rendering without an * intervening flush. Unlikely to be much performance impact to @@ -105,6 +89,40 @@ static void failover_draw_elements( struct pipe_context *pipe, } +static void failover_draw_elements( struct pipe_context *pipe, + struct pipe_resource *indexResource, + unsigned indexSize, + int indexBias, + unsigned prim, + unsigned start, + unsigned count) +{ + struct failover_context *failover = failover_context( pipe ); + struct pipe_draw_info info; + struct pipe_index_buffer saved_ib, ib; + + util_draw_init_info(&info); + info.mode = prim; + info.start = start; + info.count = count; + + if (indexResource) { + info.indexed = TRUE; + saved_ib = failover->index_buffer; + + ib.buffer = indexResource; + ib.offset = 0; + ib.index_size = indexSize; + pipe->set_index_buffer(pipe, &ib); + } + + failover_draw_vbo(pipe, &info); + + if (indexResource) + pipe->set_index_buffer(pipe, &saved_ib); +} + + static void failover_draw_arrays( struct pipe_context *pipe, unsigned prim, unsigned start, unsigned count) { @@ -145,6 +163,7 @@ struct pipe_context *failover_create( struct pipe_context *hw, failover->pipe.draw_arrays = failover_draw_arrays; failover->pipe.draw_elements = failover_draw_elements; + failover->pipe.draw_vbo = failover_draw_vbo; failover->pipe.clear = hw->clear; failover->pipe.clear_render_target = hw->clear_render_target; failover->pipe.clear_depth_stencil = hw->clear_depth_stencil; diff --git a/src/gallium/drivers/failover/fo_context.h b/src/gallium/drivers/failover/fo_context.h index 9d3e0d0dba..1afa6c9cee 100644 --- a/src/gallium/drivers/failover/fo_context.h +++ b/src/gallium/drivers/failover/fo_context.h @@ -56,6 +56,7 @@ #define FO_NEW_VERTEX_BUFFER 0x40000 #define FO_NEW_VERTEX_ELEMENT 0x80000 #define FO_NEW_SAMPLE_MASK 0x100000 +#define FO_NEW_INDEX_BUFFER 0x200000 @@ -97,6 +98,7 @@ struct failover_context { struct pipe_scissor_state scissor; struct pipe_viewport_state viewport; struct pipe_vertex_buffer vertex_buffers[PIPE_MAX_ATTRIBS]; + struct pipe_index_buffer index_buffer; uint num_vertex_buffers; diff --git a/src/gallium/drivers/failover/fo_state.c b/src/gallium/drivers/failover/fo_state.c index 12e42379f9..c265f381b6 100644 --- a/src/gallium/drivers/failover/fo_state.c +++ b/src/gallium/drivers/failover/fo_state.c @@ -583,6 +583,23 @@ failover_set_vertex_buffers(struct pipe_context *pipe, } +static void +failover_set_index_buffer(struct pipe_context *pipe, + const struct pipe_index_buffer *ib) +{ + struct failover_context *failover = failover_context(pipe); + + if (ib) + memcpy(&failover->index_buffer, ib, sizeof(failover->index_buffer)); + else + memset(&failover->index_buffer, 0, sizeof(failover->index_buffer)); + + failover->dirty |= FO_NEW_INDEX_BUFFER; + failover->sw->set_index_buffer( failover->sw, ib ); + failover->hw->set_index_buffer( failover->hw, ib ); +} + + void failover_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, @@ -635,6 +652,7 @@ failover_init_state_functions( struct failover_context *failover ) failover->pipe.set_vertex_sampler_views = failover_set_vertex_sampler_views; failover->pipe.set_viewport_state = failover_set_viewport_state; failover->pipe.set_vertex_buffers = failover_set_vertex_buffers; + failover->pipe.set_index_buffer = failover_set_index_buffer; failover->pipe.set_constant_buffer = failover_set_constant_buffer; failover->pipe.create_sampler_view = failover_create_sampler_view; failover->pipe.sampler_view_destroy = failover_sampler_view_destroy; diff --git a/src/gallium/drivers/failover/fo_state_emit.c b/src/gallium/drivers/failover/fo_state_emit.c index 147f23269c..7f434ff9d6 100644 --- a/src/gallium/drivers/failover/fo_state_emit.c +++ b/src/gallium/drivers/failover/fo_state_emit.c @@ -135,5 +135,10 @@ failover_state_emit( struct failover_context *failover ) failover->vertex_buffers ); } + if (failover->dirty & FO_NEW_INDEX_BUFFER) { + failover->sw->set_index_buffer( failover->sw, + &failover->index_buffer ); + } + failover->dirty = 0; } diff --git a/src/gallium/drivers/galahad/glhd_context.c b/src/gallium/drivers/galahad/glhd_context.c index ab6f17b3ab..6473f2d499 100644 --- a/src/gallium/drivers/galahad/glhd_context.c +++ b/src/gallium/drivers/galahad/glhd_context.c @@ -112,6 +112,16 @@ galahad_draw_range_elements(struct pipe_context *_pipe, count); } +static void +galahad_draw_vbo(struct pipe_context *_pipe, + const struct pipe_draw_info *info) +{ + struct galahad_context *glhd_pipe = galahad_context(_pipe); + struct pipe_context *pipe = glhd_pipe->pipe; + + pipe->draw_vbo(pipe, info); +} + static struct pipe_query * galahad_create_query(struct pipe_context *_pipe, unsigned query_type) @@ -650,6 +660,41 @@ galahad_set_vertex_buffers(struct pipe_context *_pipe, num_buffers, buffers); } + +static void +galahad_set_index_buffer(struct pipe_context *_pipe, + const struct pipe_index_buffer *_ib) +{ + struct galahad_context *glhd_pipe = galahad_context(_pipe); + struct pipe_context *pipe = glhd_pipe->pipe; + struct pipe_index_buffer unwrapped_ib, *ib = NULL; + + if (_ib->buffer) { + switch (_ib->index_size) { + case 1: + case 2: + case 4: + break; + default: + glhd_warn("index buffer %p has unrecognized index size %d", + _ib->buffer, _ib->index_size); + break; + } + } + else if (_ib->offset || _ib->index_size) { + glhd_warn("non-indexed state with index offset %d and index size %d", + _ib->offset, _ib->index_size); + } + + if (_ib) { + unwrapped_ib = *_ib; + unwrapped_ib.buffer = galahad_resource_unwrap(_ib->buffer); + ib = &unwrapped_ib; + } + + pipe->set_index_buffer(pipe, ib); +} + static void galahad_resource_copy_region(struct pipe_context *_pipe, struct pipe_resource *_dst, @@ -937,6 +982,7 @@ galahad_context_create(struct pipe_screen *_screen, struct pipe_context *pipe) glhd_pipe->base.draw_arrays = galahad_draw_arrays; glhd_pipe->base.draw_elements = galahad_draw_elements; glhd_pipe->base.draw_range_elements = galahad_draw_range_elements; + glhd_pipe->base.draw_vbo = galahad_draw_vbo; glhd_pipe->base.create_query = galahad_create_query; glhd_pipe->base.destroy_query = galahad_destroy_query; glhd_pipe->base.begin_query = galahad_begin_query; @@ -976,6 +1022,7 @@ galahad_context_create(struct pipe_screen *_screen, struct pipe_context *pipe) glhd_pipe->base.set_fragment_sampler_views = galahad_set_fragment_sampler_views; glhd_pipe->base.set_vertex_sampler_views = galahad_set_vertex_sampler_views; glhd_pipe->base.set_vertex_buffers = galahad_set_vertex_buffers; + glhd_pipe->base.set_index_buffer = galahad_set_index_buffer; glhd_pipe->base.resource_copy_region = galahad_resource_copy_region; glhd_pipe->base.clear = galahad_clear; glhd_pipe->base.clear_render_target = galahad_clear_render_target; diff --git a/src/gallium/drivers/i915/i915_context.c b/src/gallium/drivers/i915/i915_context.c index 2af9bdac95..ca07b3e235 100644 --- a/src/gallium/drivers/i915/i915_context.c +++ b/src/gallium/drivers/i915/i915_context.c @@ -36,6 +36,7 @@ #include "pipe/p_defines.h" #include "util/u_inlines.h" #include "util/u_memory.h" +#include "util/u_draw_quad.h" #include "pipe/p_screen.h" @@ -45,16 +46,11 @@ static void -i915_draw_range_elements(struct pipe_context *pipe, - struct pipe_resource *indexBuffer, - unsigned indexSize, - int indexBias, - unsigned min_index, - unsigned max_index, - unsigned prim, unsigned start, unsigned count) +i915_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) { struct i915_context *i915 = i915_context(pipe); struct draw_context *draw = i915->draw; + void *mapped_indices = NULL; unsigned i; if (i915->dirty) @@ -71,16 +67,18 @@ i915_draw_range_elements(struct pipe_context *pipe, /* * Map index buffer, if present */ - if (indexBuffer) { - void *mapped_indexes = i915_buffer(indexBuffer)->data; - draw_set_mapped_element_buffer_range(draw, indexSize, indexBias, - min_index, - max_index, - mapped_indexes); - } else { - draw_set_mapped_element_buffer(draw, 0, 0, NULL); + if (info->indexed && i915->index_buffer.buffer) { + mapped_indices = i915_buffer(i915->index_buffer.buffer)->data; + mapped_indices += i915->index_buffer.offset; } + draw_set_mapped_element_buffer_range(draw, (mapped_indices) ? + i915->index_buffer.index_size : 0, + info->index_bias, + info->min_index, + info->max_index, + mapped_indices); + draw_set_mapped_constant_buffer(draw, PIPE_SHADER_VERTEX, 0, i915->current.constants[PIPE_SHADER_VERTEX], @@ -90,7 +88,7 @@ i915_draw_range_elements(struct pipe_context *pipe, /* * Do the drawing */ - draw_arrays(i915->draw, prim, start, count); + draw_arrays(i915->draw, info->mode, info->start, info->count); /* * unmap vertex/index buffers @@ -99,11 +97,48 @@ i915_draw_range_elements(struct pipe_context *pipe, draw_set_mapped_vertex_buffer(draw, i, NULL); } - if (indexBuffer) { + if (mapped_indices) { draw_set_mapped_element_buffer(draw, 0, 0, NULL); } } +static void +i915_draw_range_elements(struct pipe_context *pipe, + struct pipe_resource *indexBuffer, + unsigned indexSize, + int indexBias, + unsigned min_index, + unsigned max_index, + unsigned prim, unsigned start, unsigned count) +{ + struct i915_context *i915 = i915_context(pipe); + struct pipe_draw_info info; + struct pipe_index_buffer saved_ib, ib; + + util_draw_init_info(&info); + info.mode = prim; + info.start = start; + info.count = count; + info.index_bias = indexBias; + info.min_index = min_index; + info.max_index = max_index; + + if (indexBuffer) { + info.indexed = TRUE; + saved_ib = i915->index_buffer; + + ib.buffer = indexBuffer; + ib.offset = 0; + ib.index_size = indexSize; + pipe->set_index_buffer(pipe, &ib); + } + + i915_draw_vbo(pipe, &info); + + if (indexBuffer) + pipe->set_index_buffer(pipe, &saved_ib); +} + static void i915_draw_elements(struct pipe_context *pipe, struct pipe_resource *indexBuffer, @@ -171,6 +206,7 @@ i915_create_context(struct pipe_screen *screen, void *priv) i915->base.draw_arrays = i915_draw_arrays; i915->base.draw_elements = i915_draw_elements; i915->base.draw_range_elements = i915_draw_range_elements; + i915->base.draw_vbo = i915_draw_vbo; /* * Create drawing context and plug our rendering stage into it. diff --git a/src/gallium/drivers/i915/i915_context.h b/src/gallium/drivers/i915/i915_context.h index b210cb130d..3ae61d0ea7 100644 --- a/src/gallium/drivers/i915/i915_context.h +++ b/src/gallium/drivers/i915/i915_context.h @@ -221,6 +221,7 @@ struct i915_context struct pipe_sampler_view *fragment_sampler_views[PIPE_MAX_SAMPLERS]; struct pipe_viewport_state viewport; struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS]; + struct pipe_index_buffer index_buffer; unsigned dirty; diff --git a/src/gallium/drivers/i915/i915_state.c b/src/gallium/drivers/i915/i915_state.c index e767aa9f8f..385c3b2d2d 100644 --- a/src/gallium/drivers/i915/i915_state.c +++ b/src/gallium/drivers/i915/i915_state.c @@ -812,6 +812,19 @@ i915_delete_vertex_elements_state(struct pipe_context *pipe, void *velems) FREE( velems ); } +static void i915_set_index_buffer(struct pipe_context *pipe, + const struct pipe_index_buffer *ib) +{ + struct i915_context *i915 = i915_context(pipe); + + if (ib) + memcpy(&i915->index_buffer, ib, sizeof(i915->index_buffer)); + else + memset(&i915->index_buffer, 0, sizeof(i915->index_buffer)); + + /* TODO make this more like a state */ +} + static void i915_set_sample_mask(struct pipe_context *pipe, unsigned sample_mask) @@ -860,4 +873,5 @@ i915_init_state_functions( struct i915_context *i915 ) i915->base.sampler_view_destroy = i915_sampler_view_destroy; i915->base.set_viewport_state = i915_set_viewport_state; i915->base.set_vertex_buffers = i915_set_vertex_buffers; + i915->base.set_index_buffer = i915_set_index_buffer; } diff --git a/src/gallium/drivers/i965/brw_context.h b/src/gallium/drivers/i965/brw_context.h index 94c9c443f0..56d351f97d 100644 --- a/src/gallium/drivers/i965/brw_context.h +++ b/src/gallium/drivers/i965/brw_context.h @@ -576,6 +576,7 @@ struct brw_context */ struct pipe_resource *index_buffer; unsigned index_size; + unsigned index_offset; /* Updates are signalled by PIPE_NEW_INDEX_RANGE: */ diff --git a/src/gallium/drivers/i965/brw_draw.c b/src/gallium/drivers/i965/brw_draw.c index 4625c2048f..fa7d047e0b 100644 --- a/src/gallium/drivers/i965/brw_draw.c +++ b/src/gallium/drivers/i965/brw_draw.c @@ -29,6 +29,7 @@ #include "util/u_inlines.h" #include "util/u_prim.h" #include "util/u_upload_mgr.h" +#include "util/u_draw_quad.h" #include "brw_draw.h" #include "brw_defines.h" @@ -142,7 +143,7 @@ static int brw_emit_prim(struct brw_context *brw, */ static int try_draw_range_elements(struct brw_context *brw, - struct pipe_resource *index_buffer, + boolean indexed, unsigned hw_prim, unsigned start, unsigned count) { @@ -165,7 +166,7 @@ try_draw_range_elements(struct brw_context *brw, if (ret) return ret; - ret = brw_emit_prim(brw, start, count, index_buffer != NULL, hw_prim); + ret = brw_emit_prim(brw, start, count, indexed, hw_prim); if (ret) return ret; @@ -177,61 +178,86 @@ try_draw_range_elements(struct brw_context *brw, static void -brw_draw_range_elements(struct pipe_context *pipe, - struct pipe_resource *index_buffer, - unsigned index_size, int index_bias, - unsigned min_index, - unsigned max_index, - unsigned mode, unsigned start, unsigned count) +brw_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) { struct brw_context *brw = brw_context(pipe); int ret; uint32_t hw_prim; - hw_prim = brw_set_prim(brw, mode); + hw_prim = brw_set_prim(brw, info->mode); if (BRW_DEBUG & DEBUG_PRIMS) debug_printf("PRIM: %s start %d count %d index_buffer %p\n", - u_prim_name(mode), start, count, (void *)index_buffer); - - assert(index_bias == 0); + u_prim_name(info->mode), info->start, info->count, + (void *) brw->curr.index_buffer); - /* Potentially trigger upload of new index buffer. - * - * XXX: do we need to go through state validation to achieve this? - * Could just call upload code directly. - */ - if (brw->curr.index_buffer != index_buffer || - brw->curr.index_size != index_size) { - pipe_resource_reference( &brw->curr.index_buffer, index_buffer ); - brw->curr.index_size = index_size; - brw->state.dirty.mesa |= PIPE_NEW_INDEX_BUFFER; - } + assert(info->index_bias == 0); - /* XXX: do we really care? + /* Potentially trigger upload of new index buffer range. + * XXX: do we really care? */ - if (brw->curr.min_index != min_index || - brw->curr.max_index != max_index) + if (brw->curr.min_index != info->min_index || + brw->curr.max_index != info->max_index) { - brw->curr.min_index = min_index; - brw->curr.max_index = max_index; + brw->curr.min_index = info->min_index; + brw->curr.max_index = info->max_index; brw->state.dirty.mesa |= PIPE_NEW_INDEX_RANGE; } /* Make a first attempt at drawing: */ - ret = try_draw_range_elements(brw, index_buffer, hw_prim, start, count ); + ret = try_draw_range_elements(brw, info->indexed, + hw_prim, info->start, info->count); /* Otherwise, flush and retry: */ if (ret != 0) { brw_context_flush( brw ); - ret = try_draw_range_elements(brw, index_buffer, hw_prim, start, count ); + ret = try_draw_range_elements(brw, info->indexed, + hw_prim, info->start, info->count); assert(ret == 0); } } +static void +brw_draw_range_elements(struct pipe_context *pipe, + struct pipe_resource *index_buffer, + unsigned index_size, int index_bias, + unsigned min_index, + unsigned max_index, + unsigned mode, unsigned start, unsigned count) +{ + struct brw_context *brw = brw_context(pipe); + struct pipe_draw_info info; + struct pipe_index_buffer saved_ib, ib; + + util_draw_init_info(&info); + info.mode = mode; + info.start = start; + info.count = count; + info.index_bias = index_bias; + info.min_index = min_index; + info.max_index = max_index; + + if (index_buffer) { + info.indexed = TRUE; + saved_ib.buffer = brw->curr.index_buffer; + saved_ib.offset = brw->curr.index_offset; + saved_ib.index_size = brw->curr.index_size; + + ib.buffer = index_buffer; + ib.offset = 0; + ib.index_size = index_size; + pipe->set_index_buffer(pipe, &ib); + } + + brw_draw_vbo(pipe, &info); + + if (index_buffer) + pipe->set_index_buffer(pipe, &saved_ib); +} + static void brw_draw_elements(struct pipe_context *pipe, struct pipe_resource *index_buffer, @@ -262,6 +288,7 @@ boolean brw_draw_init( struct brw_context *brw ) brw->base.draw_arrays = brw_draw_arrays; brw->base.draw_elements = brw_draw_elements; brw->base.draw_range_elements = brw_draw_range_elements; + brw->base.draw_vbo = brw_draw_vbo; /* Create helpers for uploading data in user buffers: */ diff --git a/src/gallium/drivers/i965/brw_draw_upload.c b/src/gallium/drivers/i965/brw_draw_upload.c index 337eee8cd9..ebeb1e146a 100644 --- a/src/gallium/drivers/i965/brw_draw_upload.c +++ b/src/gallium/drivers/i965/brw_draw_upload.c @@ -231,7 +231,7 @@ static int brw_prepare_indices(struct brw_context *brw) struct pipe_resource *upload_buf = NULL; struct brw_winsys_buffer *bo = NULL; GLuint offset; - GLuint index_size; + GLuint index_size, index_offset; GLuint ib_size; int ret; @@ -246,13 +246,14 @@ static int brw_prepare_indices(struct brw_context *brw) ib_size = index_buffer->width0; index_size = brw->curr.index_size; + index_offset = brw->curr.index_offset; /* Turn userbuffer into a proper hardware buffer? */ if (brw_buffer_is_user_buffer(index_buffer)) { ret = u_upload_buffer( brw->vb.upload_index, - 0, + index_offset, ib_size, index_buffer, &offset, @@ -269,7 +270,7 @@ static int brw_prepare_indices(struct brw_context *brw) else { bo = brw_buffer(index_buffer)->bo; ib_size = bo->size; - offset = 0; + offset = index_offset; } /* Use CMD_3D_PRIM's start_vertex_offset to avoid re-uploading the diff --git a/src/gallium/drivers/i965/brw_pipe_vertex.c b/src/gallium/drivers/i965/brw_pipe_vertex.c index 4a120a51da..007239efc4 100644 --- a/src/gallium/drivers/i965/brw_pipe_vertex.c +++ b/src/gallium/drivers/i965/brw_pipe_vertex.c @@ -274,10 +274,41 @@ static void brw_set_vertex_buffers(struct pipe_context *pipe, } +static void brw_set_index_buffer(struct pipe_context *pipe, + const struct pipe_index_buffer *ib) +{ + struct brw_context *brw = brw_context(pipe); + + if (ib) { + if (brw->curr.index_buffer == ib->buffer && + brw->curr.index_offset == ib->offset && + brw->curr.index_size == ib->index_size) + return; + + pipe_resource_reference(&brw->curr.index_buffer, ib->buffer); + brw->curr.index_offset = ib->offset; + brw->curr.index_size = ib->index_size; + } + else { + if (!brw->curr.index_buffer && + !brw->curr.index_offset && + !brw->curr.index_size) + return; + + pipe_resource_reference(&brw->curr.index_buffer, NULL); + brw->curr.index_offset = 0; + brw->curr.index_size = 0; + } + + brw->state.dirty.mesa |= PIPE_NEW_INDEX_BUFFER; +} + + void brw_pipe_vertex_init( struct brw_context *brw ) { brw->base.set_vertex_buffers = brw_set_vertex_buffers; + brw->base.set_index_buffer = brw_set_index_buffer; brw->base.create_vertex_elements_state = brw_create_vertex_elements_state; brw->base.bind_vertex_elements_state = brw_bind_vertex_elements_state; brw->base.delete_vertex_elements_state = brw_delete_vertex_elements_state; diff --git a/src/gallium/drivers/identity/id_context.c b/src/gallium/drivers/identity/id_context.c index 67be895b38..e10d3a1413 100644 --- a/src/gallium/drivers/identity/id_context.c +++ b/src/gallium/drivers/identity/id_context.c @@ -110,6 +110,16 @@ identity_draw_range_elements(struct pipe_context *_pipe, count); } +static void +identity_draw_vbo(struct pipe_context *_pipe, + const struct pipe_draw_info *info) +{ + struct identity_context *id_pipe = identity_context(_pipe); + struct pipe_context *pipe = id_pipe->pipe; + + pipe->draw_vbo(pipe, info); +} + static struct pipe_query * identity_create_query(struct pipe_context *_pipe, unsigned query_type) @@ -611,6 +621,24 @@ identity_set_vertex_buffers(struct pipe_context *_pipe, num_buffers, buffers); } + +static void +identity_set_index_buffer(struct pipe_context *_pipe, + const struct pipe_index_buffer *_ib) +{ + struct identity_context *id_pipe = identity_context(_pipe); + struct pipe_context *pipe = id_pipe->pipe; + struct pipe_index_buffer unwrapped_ib, *ib = NULL; + + if (_ib) { + unwrapped_ib = *_ib; + unwrapped_ib.buffer = identity_resource_unwrap(_ib->buffer); + ib = &unwrapped_ib; + } + + pipe->set_index_buffer(pipe, ib); +} + static void identity_resource_copy_region(struct pipe_context *_pipe, struct pipe_resource *_dst, @@ -892,6 +920,7 @@ identity_context_create(struct pipe_screen *_screen, struct pipe_context *pipe) id_pipe->base.draw_arrays = identity_draw_arrays; id_pipe->base.draw_elements = identity_draw_elements; id_pipe->base.draw_range_elements = identity_draw_range_elements; + id_pipe->base.draw_vbo = identity_draw_vbo; id_pipe->base.create_query = identity_create_query; id_pipe->base.destroy_query = identity_destroy_query; id_pipe->base.begin_query = identity_begin_query; @@ -931,6 +960,7 @@ identity_context_create(struct pipe_screen *_screen, struct pipe_context *pipe) id_pipe->base.set_fragment_sampler_views = identity_set_fragment_sampler_views; id_pipe->base.set_vertex_sampler_views = identity_set_vertex_sampler_views; id_pipe->base.set_vertex_buffers = identity_set_vertex_buffers; + id_pipe->base.set_index_buffer = identity_set_index_buffer; id_pipe->base.resource_copy_region = identity_resource_copy_region; id_pipe->base.clear = identity_clear; id_pipe->base.clear_render_target = identity_clear_render_target; diff --git a/src/gallium/drivers/llvmpipe/lp_context.h b/src/gallium/drivers/llvmpipe/lp_context.h index b2643ab33c..50f9091c3c 100644 --- a/src/gallium/drivers/llvmpipe/lp_context.h +++ b/src/gallium/drivers/llvmpipe/lp_context.h @@ -77,6 +77,7 @@ struct llvmpipe_context { struct pipe_sampler_view *vertex_sampler_views[PIPE_MAX_VERTEX_SAMPLERS]; struct pipe_viewport_state viewport; struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS]; + struct pipe_index_buffer index_buffer; struct { struct llvmpipe_resource *buffer[PIPE_MAX_SO_BUFFERS]; int offset[PIPE_MAX_SO_BUFFERS]; diff --git a/src/gallium/drivers/llvmpipe/lp_draw_arrays.c b/src/gallium/drivers/llvmpipe/lp_draw_arrays.c index 625d0c8a8c..b6dbb9d288 100644 --- a/src/gallium/drivers/llvmpipe/lp_draw_arrays.c +++ b/src/gallium/drivers/llvmpipe/lp_draw_arrays.c @@ -34,6 +34,7 @@ #include "pipe/p_defines.h" #include "pipe/p_context.h" #include "util/u_prim.h" +#include "util/u_draw_quad.h" #include "lp_context.h" #include "lp_state.h" @@ -49,20 +50,11 @@ * the drawing to the 'draw' module. */ static void -llvmpipe_draw_range_elements_instanced(struct pipe_context *pipe, - struct pipe_resource *indexBuffer, - unsigned indexSize, - int indexBias, - unsigned minIndex, - unsigned maxIndex, - unsigned mode, - unsigned start, - unsigned count, - unsigned startInstance, - unsigned instanceCount) +llvmpipe_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) { struct llvmpipe_context *lp = llvmpipe_context(pipe); struct draw_context *draw = lp->draw; + void *mapped_indices = NULL; unsigned i; if (lp->dirty) @@ -77,27 +69,25 @@ llvmpipe_draw_range_elements_instanced(struct pipe_context *pipe, } /* Map index buffer, if present */ - if (indexBuffer) { - void *mapped_indexes = llvmpipe_resource_data(indexBuffer); - draw_set_mapped_element_buffer_range(draw, - indexSize, - indexBias, - minIndex, - maxIndex, - mapped_indexes); - } - else { - /* no index/element buffer */ - draw_set_mapped_element_buffer_range(draw, 0, 0, start, - start + count - 1, NULL); + if (info->indexed && lp->index_buffer.buffer) { + mapped_indices = llvmpipe_resource_data(lp->index_buffer.buffer); + mapped_indices += lp->index_buffer.offset; } + + draw_set_mapped_element_buffer_range(draw, (mapped_indices) ? + lp->index_buffer.index_size : 0, + info->index_bias, + info->min_index, + info->max_index, + mapped_indices); + llvmpipe_prepare_vertex_sampling(lp, lp->num_vertex_sampler_views, lp->vertex_sampler_views); /* draw! */ - draw_arrays_instanced(draw, mode, start, count, - startInstance, instanceCount); + draw_arrays_instanced(draw, info->mode, info->start, info->count, + info->start_instance, info->instance_count); /* * unmap vertex/index buffers @@ -105,7 +95,7 @@ llvmpipe_draw_range_elements_instanced(struct pipe_context *pipe, for (i = 0; i < lp->num_vertex_buffers; i++) { draw_set_mapped_vertex_buffer(draw, i, NULL); } - if (indexBuffer) { + if (mapped_indices) { draw_set_mapped_element_buffer(draw, 0, 0, NULL); } llvmpipe_cleanup_vertex_sampling(lp); @@ -119,6 +109,50 @@ llvmpipe_draw_range_elements_instanced(struct pipe_context *pipe, } +static void +llvmpipe_draw_range_elements_instanced(struct pipe_context *pipe, + struct pipe_resource *indexBuffer, + unsigned indexSize, + int indexBias, + unsigned minIndex, + unsigned maxIndex, + unsigned mode, + unsigned start, + unsigned count, + unsigned startInstance, + unsigned instanceCount) +{ + struct llvmpipe_context *lp = llvmpipe_context(pipe); + struct pipe_draw_info info; + struct pipe_index_buffer saved_ib, ib; + + util_draw_init_info(&info); + info.mode = mode; + info.start = start; + info.count = count; + info.start_instance = startInstance; + info.instance_count = instanceCount; + + info.index_bias = indexBias; + info.min_index = minIndex; + info.max_index = maxIndex; + + if (indexBuffer) { + info.indexed = TRUE; + saved_ib = lp->index_buffer; + + ib.buffer = indexBuffer; + ib.offset = 0; + ib.index_size = indexSize; + pipe->set_index_buffer(pipe, &ib); + } + + llvmpipe_draw_vbo(pipe, &info); + + if (indexBuffer) + pipe->set_index_buffer(pipe, &saved_ib); +} + static void llvmpipe_draw_arrays_instanced(struct pipe_context *pipe, unsigned mode, @@ -227,4 +261,6 @@ llvmpipe_init_draw_funcs(struct llvmpipe_context *llvmpipe) llvmpipe->pipe.draw_range_elements = llvmpipe_draw_range_elements; llvmpipe->pipe.draw_arrays_instanced = llvmpipe_draw_arrays_instanced; llvmpipe->pipe.draw_elements_instanced = llvmpipe_draw_elements_instanced; + + llvmpipe->pipe.draw_vbo = llvmpipe_draw_vbo; } diff --git a/src/gallium/drivers/llvmpipe/lp_state_vertex.c b/src/gallium/drivers/llvmpipe/lp_state_vertex.c index 113f13db01..d86e66b4fb 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_vertex.c +++ b/src/gallium/drivers/llvmpipe/lp_state_vertex.c @@ -89,6 +89,19 @@ llvmpipe_set_vertex_buffers(struct pipe_context *pipe, } +static void +llvmpipe_set_index_buffer(struct pipe_context *pipe, + const struct pipe_index_buffer *ib) +{ + struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe); + + if (ib) + memcpy(&llvmpipe->index_buffer, ib, sizeof(llvmpipe->index_buffer)); + else + memset(&llvmpipe->index_buffer, 0, sizeof(llvmpipe->index_buffer)); + + /* TODO make this more like a state */ +} void llvmpipe_init_vertex_funcs(struct llvmpipe_context *llvmpipe) @@ -98,4 +111,5 @@ llvmpipe_init_vertex_funcs(struct llvmpipe_context *llvmpipe) llvmpipe->pipe.delete_vertex_elements_state = llvmpipe_delete_vertex_elements_state; llvmpipe->pipe.set_vertex_buffers = llvmpipe_set_vertex_buffers; + llvmpipe->pipe.set_index_buffer = llvmpipe_set_index_buffer; } diff --git a/src/gallium/drivers/nv50/nv50_context.c b/src/gallium/drivers/nv50/nv50_context.c index 915a925402..3fc39c1137 100644 --- a/src/gallium/drivers/nv50/nv50_context.c +++ b/src/gallium/drivers/nv50/nv50_context.c @@ -86,6 +86,7 @@ nv50_create(struct pipe_screen *pscreen, void *priv) nv50->pipe.draw_arrays_instanced = nv50_draw_arrays_instanced; nv50->pipe.draw_elements = nv50_draw_elements; nv50->pipe.draw_elements_instanced = nv50_draw_elements_instanced; + nv50->pipe.draw_vbo = nv50_draw_vbo; nv50->pipe.clear = nv50_clear; nv50->pipe.flush = nv50_flush; diff --git a/src/gallium/drivers/nv50/nv50_context.h b/src/gallium/drivers/nv50/nv50_context.h index 12c4a93a9b..a7c2b5d487 100644 --- a/src/gallium/drivers/nv50/nv50_context.h +++ b/src/gallium/drivers/nv50/nv50_context.h @@ -148,6 +148,7 @@ struct nv50_context { struct pipe_resource *constbuf[PIPE_SHADER_TYPES]; struct pipe_vertex_buffer vtxbuf[PIPE_MAX_ATTRIBS]; unsigned vtxbuf_nr; + struct pipe_index_buffer idxbuf; struct nv50_vtxelt_stateobj *vtxelt; struct nv50_sampler_stateobj *sampler[3][PIPE_MAX_SAMPLERS]; unsigned sampler_nr[3]; @@ -197,6 +198,8 @@ extern void nv50_draw_elements_instanced(struct pipe_context *pipe, unsigned count, unsigned startInstance, unsigned instanceCount); +extern void nv50_draw_vbo(struct pipe_context *pipe, + const struct pipe_draw_info *info); extern void nv50_vtxelt_construct(struct nv50_vtxelt_stateobj *cso); extern struct nouveau_stateobj *nv50_vbo_validate(struct nv50_context *nv50); diff --git a/src/gallium/drivers/nv50/nv50_state.c b/src/gallium/drivers/nv50/nv50_state.c index 42c5a58318..ec0c0ff283 100644 --- a/src/gallium/drivers/nv50/nv50_state.c +++ b/src/gallium/drivers/nv50/nv50_state.c @@ -742,6 +742,20 @@ nv50_set_vertex_buffers(struct pipe_context *pipe, unsigned count, nv50->dirty |= NV50_NEW_ARRAYS; } +static void +nv50_set_index_buffer(struct pipe_context *pipe, + const struct pipe_index_buffer *ib) +{ + struct nv50_context *nv50 = nv50_context(pipe); + + if (ib) + memcpy(&nv50->idxbuf, ib, sizeof(nv50->idxbuf)); + else + memset(&nv50->idxbuf, 0, sizeof(nv50->idxbuf)); + + /* TODO make this more like a state */ +} + static void * nv50_vtxelts_state_create(struct pipe_context *pipe, unsigned num_elements, @@ -827,5 +841,6 @@ nv50_init_state_functions(struct nv50_context *nv50) nv50->pipe.bind_vertex_elements_state = nv50_vtxelts_state_bind; nv50->pipe.set_vertex_buffers = nv50_set_vertex_buffers; + nv50->pipe.set_index_buffer = nv50_set_index_buffer; } diff --git a/src/gallium/drivers/nv50/nv50_vbo.c b/src/gallium/drivers/nv50/nv50_vbo.c index 864cb09352..11ffc182c2 100644 --- a/src/gallium/drivers/nv50/nv50_vbo.c +++ b/src/gallium/drivers/nv50/nv50_vbo.c @@ -473,6 +473,37 @@ nv50_draw_elements(struct pipe_context *pipe, mode, start, count, 0, 1); } +void +nv50_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) +{ + struct nv50_context *nv50 = nv50_context(pipe); + + if (info->indexed && nv50->idxbuf.buffer) { + unsigned offset; + + assert(nv50->idxbuf.offset % nv50->idxbuf.index_size == 0); + offset = nv50->idxbuf.offset / nv50->idxbuf.index_size; + + nv50_draw_elements_instanced(pipe, + nv50->idxbuf.buffer, + nv50->idxbuf.index_size, + info->index_bias, + info->mode, + info->start + offset, + info->count, + info->start_instance, + info->instance_count); + } + else { + nv50_draw_arrays_instanced(pipe, + info->mode, + info->start, + info->count, + info->start_instance, + info->instance_count); + } +} + static INLINE boolean nv50_vbo_static_attrib(struct nv50_context *nv50, unsigned attrib, struct nouveau_stateobj **pso, diff --git a/src/gallium/drivers/nvfx/nvfx_context.c b/src/gallium/drivers/nvfx/nvfx_context.c index 6d2dc4d5bf..f30795f69a 100644 --- a/src/gallium/drivers/nvfx/nvfx_context.c +++ b/src/gallium/drivers/nvfx/nvfx_context.c @@ -57,6 +57,7 @@ nvfx_create(struct pipe_screen *pscreen, void *priv) nvfx->pipe.destroy = nvfx_destroy; nvfx->pipe.draw_arrays = nvfx_draw_arrays; nvfx->pipe.draw_elements = nvfx_draw_elements; + nvfx->pipe.draw_vbo = nvfx_draw_vbo; nvfx->pipe.clear = nvfx_clear; nvfx->pipe.flush = nvfx_flush; diff --git a/src/gallium/drivers/nvfx/nvfx_context.h b/src/gallium/drivers/nvfx/nvfx_context.h index e48f9f3aa8..d6cd272eed 100644 --- a/src/gallium/drivers/nvfx/nvfx_context.h +++ b/src/gallium/drivers/nvfx/nvfx_context.h @@ -121,7 +121,8 @@ struct nvfx_context { struct pipe_stencil_ref stencil_ref; struct pipe_viewport_state viewport; struct pipe_framebuffer_state framebuffer; - struct pipe_resource *idxbuf; + struct pipe_index_buffer idxbuf; + struct pipe_resource *idxbuf_buffer; unsigned idxbuf_format; struct nvfx_sampler_state *tex_sampler[PIPE_MAX_SAMPLERS]; struct pipe_sampler_view *fragment_sampler_views[PIPE_MAX_SAMPLERS]; @@ -242,6 +243,8 @@ extern void nvfx_draw_elements(struct pipe_context *pipe, unsigned indexSize, int indexBias, unsigned mode, unsigned start, unsigned count); +extern void nvfx_draw_vbo(struct pipe_context *pipe, + const struct pipe_draw_info *info); /* nvfx_vertprog.c */ extern boolean nvfx_vertprog_validate(struct nvfx_context *nvfx); diff --git a/src/gallium/drivers/nvfx/nvfx_state.c b/src/gallium/drivers/nvfx/nvfx_state.c index 30322d46d9..cd58e439d7 100644 --- a/src/gallium/drivers/nvfx/nvfx_state.c +++ b/src/gallium/drivers/nvfx/nvfx_state.c @@ -555,6 +555,20 @@ nvfx_set_vertex_buffers(struct pipe_context *pipe, unsigned count, nvfx->draw_dirty |= NVFX_NEW_ARRAYS; } +static void +nvfx_set_index_buffer(struct pipe_context *pipe, + const struct pipe_index_buffer *ib) +{ + struct nvfx_context *nvfx = nvfx_context(pipe); + + if (ib) + memcpy(&nvfx->idxbuf, ib, sizeof(nvfx->idxbuf)); + else + memset(&nvfx->idxbuf, 0, sizeof(nvfx->idxbuf)); + + /* TODO make this more like a state */ +} + static void * nvfx_vtxelts_state_create(struct pipe_context *pipe, unsigned num_elements, @@ -635,4 +649,5 @@ nvfx_init_state_functions(struct nvfx_context *nvfx) nvfx->pipe.bind_vertex_elements_state = nvfx_vtxelts_state_bind; nvfx->pipe.set_vertex_buffers = nvfx_set_vertex_buffers; + nvfx->pipe.set_index_buffer = nvfx_set_index_buffer; } diff --git a/src/gallium/drivers/nvfx/nvfx_vbo.c b/src/gallium/drivers/nvfx/nvfx_vbo.c index 520bae5aed..23a59b589b 100644 --- a/src/gallium/drivers/nvfx/nvfx_vbo.c +++ b/src/gallium/drivers/nvfx/nvfx_vbo.c @@ -85,7 +85,7 @@ nvfx_vbo_set_idxbuf(struct nvfx_context *nvfx, struct pipe_resource *ib, unsigned type; if (!ib) { - nvfx->idxbuf = NULL; + nvfx->idxbuf_buffer = NULL; nvfx->idxbuf_format = 0xdeadbeef; return FALSE; } @@ -104,10 +104,10 @@ nvfx_vbo_set_idxbuf(struct nvfx_context *nvfx, struct pipe_resource *ib, return FALSE; } - if (ib != nvfx->idxbuf || + if (ib != nvfx->idxbuf_buffer || type != nvfx->idxbuf_format) { nvfx->dirty |= NVFX_NEW_ARRAYS; - nvfx->idxbuf = ib; + nvfx->idxbuf_buffer = ib; nvfx->idxbuf_format = type; } @@ -491,11 +491,38 @@ nvfx_draw_elements(struct pipe_context *pipe, pipe->flush(pipe, 0, NULL); } +void +nvfx_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) +{ + struct nvfx_context *nvfx = nvfx_context(pipe); + + if (info->indexed && nvfx->idxbuf.buffer) { + unsigned offset; + + assert(nvfx->idxbuf.offset % nvfx->idxbuf.index_size == 0); + offset = nvfx->idxbuf.offset / nvfx->idxbuf.index_size; + + nvfx_draw_elements(pipe, + nvfx->idxbuf.buffer, + nvfx->idxbuf.index_size, + info->index_bias, + info->mode, + info->start + offset, + info->count); + } + else { + nvfx_draw_arrays(pipe, + info->mode, + info->start, + info->count); + } +} + boolean nvfx_vbo_validate(struct nvfx_context *nvfx) { struct nouveau_channel* chan = nvfx->screen->base.channel; - struct pipe_resource *ib = nvfx->idxbuf; + struct pipe_resource *ib = nvfx->idxbuf_buffer; unsigned ib_format = nvfx->idxbuf_format; int i; int elements = MAX2(nvfx->vtxelt->num_elements, nvfx->hw_vtxelt_nr); @@ -610,10 +637,10 @@ nvfx_vbo_relocate(struct nvfx_context *nvfx) } } - if(nvfx->idxbuf) + if(nvfx->idxbuf_buffer) { unsigned ib_flags = nvfx->screen->index_buffer_reloc_flags | NOUVEAU_BO_RD | NOUVEAU_BO_DUMMY; - struct nouveau_bo* bo = nvfx_resource(nvfx->idxbuf)->bo; + struct nouveau_bo* bo = nvfx_resource(nvfx->idxbuf_buffer)->bo; assert(nvfx->screen->index_buffer_reloc_flags); diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h index b4256c6278..7c77a46016 100644 --- a/src/gallium/drivers/r300/r300_context.h +++ b/src/gallium/drivers/r300/r300_context.h @@ -524,6 +524,8 @@ struct r300_context { struct r300_vertex_element_state *velems; bool any_user_vbs; + struct pipe_index_buffer index_buffer; + /* Vertex info for Draw. */ struct vertex_info vertex_info; diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c index bae02135da..da96098cc4 100644 --- a/src/gallium/drivers/r300/r300_render.c +++ b/src/gallium/drivers/r300/r300_render.c @@ -33,6 +33,7 @@ #include "util/u_memory.h" #include "util/u_upload_mgr.h" #include "util/u_prim.h" +#include "util/u_draw_quad.h" #include "r300_cs.h" #include "r300_context.h" @@ -638,26 +639,56 @@ static void r300_draw_arrays(struct pipe_context* pipe, unsigned mode, } } +static void r300_draw_vbo(struct pipe_context* pipe, + const struct pipe_draw_info *info) +{ + struct r300_context* r300 = r300_context(pipe); + + if (info->indexed && r300->index_buffer.buffer) { + unsigned offset; + + assert(r300->index_buffer.offset % r300->index_buffer.index_size == 0); + offset = r300->index_buffer.offset / r300->index_buffer.index_size; + + r300_draw_range_elements(pipe, + r300->index_buffer.buffer, + r300->index_buffer.index_size, + info->index_bias, + info->min_index, + info->max_index, + info->mode, + info->start + offset, + info->count); + } + else { + r300_draw_arrays(pipe, + info->mode, + info->start, + info->count); + } +} + /**************************************************************************** * The rest of this file is for SW TCL rendering only. Please be polite and * * keep these functions separated so that they are easier to locate. ~C. * ***************************************************************************/ -/* SW TCL arrays, using Draw. */ -static void r300_swtcl_draw_arrays(struct pipe_context* pipe, - unsigned mode, - unsigned start, - unsigned count) +/* SW TCL elements, using Draw. */ +static void r300_swtcl_draw_vbo(struct pipe_context* pipe, + const struct pipe_draw_info *info) { struct r300_context* r300 = r300_context(pipe); struct pipe_transfer *vb_transfer[PIPE_MAX_ATTRIBS]; + struct pipe_transfer *ib_transfer; + unsigned count = info->count; int i; + void* indices = NULL; if (r300->skip_rendering) { return; } - if (!u_trim_pipe_prim(mode, &count)) { + if (!u_trim_pipe_prim(info->mode, &count)) { return; } @@ -667,13 +698,25 @@ static void r300_swtcl_draw_arrays(struct pipe_context* pipe, void* buf = pipe_buffer_map(pipe, r300->vertex_buffer[i].buffer, PIPE_TRANSFER_READ, - &vb_transfer[i]); + &vb_transfer[i]); draw_set_mapped_vertex_buffer(r300->draw, i, buf); } - draw_set_mapped_element_buffer(r300->draw, 0, 0, NULL); + if (info->indexed && r300->index_buffer.buffer) { + indices = pipe_buffer_map(pipe, r300->index_buffer.buffer, + PIPE_TRANSFER_READ, &ib_transfer); + if (indices) + indices += r300->index_buffer.offset; + } + + draw_set_mapped_element_buffer_range(r300->draw, (indices) ? + r300->index_buffer.index_size : 0, + info->index_bias, + info->min_index, + info->max_index, + indices); - draw_arrays(r300->draw, mode, start, count); + draw_arrays(r300->draw, info->mode, info->start, count); /* XXX Not sure whether this is the best fix. * It prevents CS from being rejected and weird assertion failures. */ @@ -681,9 +724,15 @@ static void r300_swtcl_draw_arrays(struct pipe_context* pipe, for (i = 0; i < r300->vertex_buffer_count; i++) { pipe_buffer_unmap(pipe, r300->vertex_buffer[i].buffer, - vb_transfer[i]); + vb_transfer[i]); draw_set_mapped_vertex_buffer(r300->draw, i, NULL); } + + if (ib_transfer) { + pipe_buffer_unmap(pipe, r300->index_buffer.buffer, ib_transfer); + draw_set_mapped_element_buffer_range(r300->draw, 0, 0, info->start, + info->start + count - 1, NULL); + } } /* SW TCL elements, using Draw. */ @@ -698,51 +747,40 @@ static void r300_swtcl_draw_range_elements(struct pipe_context* pipe, unsigned count) { struct r300_context* r300 = r300_context(pipe); - struct pipe_transfer *vb_transfer[PIPE_MAX_ATTRIBS]; - struct pipe_transfer *ib_transfer; - int i; - void* indices; - - if (r300->skip_rendering) { - return; - } - - if (!u_trim_pipe_prim(mode, &count)) { - return; + struct pipe_draw_info info; + struct pipe_index_buffer saved_ib, ib; + + util_draw_init_info(&info); + info.mode = mode; + info.start = start; + info.count = count; + info.index_bias = indexBias; + info.min_index = minIndex; + info.max_index = maxIndex; + + if (indexBuffer) { + info.indexed = TRUE; + + saved_ib = r300->index_buffer; + ib.buffer = indexBuffer; + ib.offset = 0; + ib.index_size = indexSize; + pipe->set_index_buffer(pipe, &ib); } - r300_update_derived_state(r300); - - for (i = 0; i < r300->vertex_buffer_count; i++) { - void* buf = pipe_buffer_map(pipe, - r300->vertex_buffer[i].buffer, - PIPE_TRANSFER_READ, - &vb_transfer[i]); - draw_set_mapped_vertex_buffer(r300->draw, i, buf); - } - - indices = pipe_buffer_map(pipe, indexBuffer, - PIPE_TRANSFER_READ, &ib_transfer); - draw_set_mapped_element_buffer_range(r300->draw, indexSize, indexBias, - minIndex, maxIndex, indices); - - draw_arrays(r300->draw, mode, start, count); - - /* XXX Not sure whether this is the best fix. - * It prevents CS from being rejected and weird assertion failures. */ - draw_flush(r300->draw); + r300_swtcl_draw_vbo(pipe, &info); - for (i = 0; i < r300->vertex_buffer_count; i++) { - pipe_buffer_unmap(pipe, r300->vertex_buffer[i].buffer, - vb_transfer[i]); - draw_set_mapped_vertex_buffer(r300->draw, i, NULL); - } + if (indexBuffer) + pipe->set_index_buffer(pipe, &saved_ib); +} - pipe_buffer_unmap(pipe, indexBuffer, - ib_transfer); - draw_set_mapped_element_buffer_range(r300->draw, 0, 0, - start, start + count - 1, - NULL); +static void r300_swtcl_draw_arrays(struct pipe_context* pipe, + unsigned mode, + unsigned start, + unsigned count) +{ + r300_swtcl_draw_range_elements(pipe, NULL, 0, 0, + start, start + count -1, mode, start, count); } /* Object for rendering using Draw. */ @@ -1148,9 +1186,11 @@ void r300_init_render_functions(struct r300_context *r300) if (r300->screen->caps.has_tcl) { r300->context.draw_arrays = r300_draw_arrays; r300->context.draw_range_elements = r300_draw_range_elements; + r300->context.draw_vbo = r300_draw_vbo; } else { r300->context.draw_arrays = r300_swtcl_draw_arrays; r300->context.draw_range_elements = r300_swtcl_draw_range_elements; + r300->context.draw_vbo = r300_swtcl_draw_vbo; } r300->context.resource_resolve = r300_resource_resolve; diff --git a/src/gallium/drivers/r300/r300_render_stencilref.c b/src/gallium/drivers/r300/r300_render_stencilref.c index 9a6b4e12ff..6d801cf159 100644 --- a/src/gallium/drivers/r300/r300_render_stencilref.c +++ b/src/gallium/drivers/r300/r300_render_stencilref.c @@ -42,6 +42,9 @@ struct r300_stencilref_context { unsigned indexSize, int indexBias, unsigned minIndex, unsigned maxIndex, unsigned mode, unsigned start, unsigned count); + void (*draw_vbo)(struct pipe_context *pipe, + const struct pipe_draw_info *info); + uint32_t rs_cull_mode; uint32_t zb_stencilrefmask; ubyte ref_value_front; @@ -144,6 +147,23 @@ static void r300_stencilref_draw_range_elements( } } +static void r300_stencilref_draw_vbo(struct pipe_context *pipe, + const struct pipe_draw_info *info) +{ + struct r300_context *r300 = r300_context(pipe); + struct r300_stencilref_context *sr = r300->stencilref_fallback; + + if (!r300_stencilref_needed(r300)) { + sr->draw_vbo(pipe, info); + } else { + r300_stencilref_begin(r300); + sr->draw_vbo(pipe, info); + r300_stencilref_switch_side(r300); + sr->draw_vbo(pipe, info); + r300_stencilref_end(r300); + } +} + void r300_plug_in_stencil_ref_fallback(struct r300_context *r300) { r300->stencilref_fallback = CALLOC_STRUCT(r300_stencilref_context); @@ -151,8 +171,10 @@ void r300_plug_in_stencil_ref_fallback(struct r300_context *r300) /* Save original draw functions. */ r300->stencilref_fallback->draw_arrays = r300->context.draw_arrays; r300->stencilref_fallback->draw_range_elements = r300->context.draw_range_elements; + r300->stencilref_fallback->draw_vbo = r300->context.draw_vbo; /* Override the draw functions. */ r300->context.draw_arrays = r300_stencilref_draw_arrays; r300->context.draw_range_elements = r300_stencilref_draw_range_elements; + r300->context.draw_vbo = r300_stencilref_draw_vbo; } diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c index 3e221f2e02..bccd7d7859 100644 --- a/src/gallium/drivers/r300/r300_state.c +++ b/src/gallium/drivers/r300/r300_state.c @@ -1505,6 +1505,23 @@ static void r300_set_vertex_buffers(struct pipe_context* pipe, r300->vertex_buffer_count = count; } +static void r300_set_index_buffer(struct pipe_context* pipe, + const struct pipe_index_buffer *ib) +{ + struct r300_context* r300 = r300_context(pipe); + + if (ib) { + pipe_resource_reference(&r300->index_buffer.buffer, ib->buffer); + memcpy(&r300->index_buffer, ib, sizeof(r300->index_buffer)); + } + else { + pipe_resource_reference(&r300->index_buffer.buffer, NULL); + memset(&r300->index_buffer, 0, sizeof(r300->index_buffer)); + } + + /* TODO make this more like a state */ +} + /* Initialize the PSC tables. */ static void r300_vertex_psc(struct r300_vertex_element_state *velems) { @@ -1852,6 +1869,7 @@ void r300_init_state_functions(struct r300_context* r300) r300->context.set_viewport_state = r300_set_viewport_state; r300->context.set_vertex_buffers = r300_set_vertex_buffers; + r300->context.set_index_buffer = r300_set_index_buffer; r300->context.create_vertex_elements_state = r300_create_vertex_elements_state; r300->context.bind_vertex_elements_state = r300_bind_vertex_elements_state; diff --git a/src/gallium/drivers/r600/r600_context.c b/src/gallium/drivers/r600/r600_context.c index 4c7b67ea52..2c2bd4672b 100644 --- a/src/gallium/drivers/r600/r600_context.c +++ b/src/gallium/drivers/r600/r600_context.c @@ -316,6 +316,7 @@ struct pipe_context *r600_create_context(struct pipe_screen *screen, void *priv) rctx->context.draw_arrays = r600_draw_arrays; rctx->context.draw_elements = r600_draw_elements; rctx->context.draw_range_elements = r600_draw_range_elements; + rctx->context.draw_vbo = r600_draw_vbo; rctx->context.flush = r600_flush; /* Easy accessing of screen/winsys. */ diff --git a/src/gallium/drivers/r600/r600_context.h b/src/gallium/drivers/r600/r600_context.h index 1f03b202ee..9427c19d05 100644 --- a/src/gallium/drivers/r600/r600_context.h +++ b/src/gallium/drivers/r600/r600_context.h @@ -157,6 +157,7 @@ struct r600_context { struct r600_context_state *vs_sampler_view[PIPE_MAX_ATTRIBS]; struct r600_vertex_element *vertex_elements; struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS]; + struct pipe_index_buffer index_buffer; }; #if 0 @@ -201,6 +202,8 @@ void r600_draw_range_elements(struct pipe_context *ctx, unsigned index_size, int index_bias, unsigned min_index, unsigned max_index, unsigned mode, unsigned start, unsigned count); +void r600_draw_vbo(struct pipe_context *ctx, + const struct pipe_draw_info *info); void r600_init_blit_functions(struct r600_context *rctx); void r600_init_state_functions(struct r600_context *rctx); diff --git a/src/gallium/drivers/r600/r600_draw.c b/src/gallium/drivers/r600/r600_draw.c index b248beaf8c..eeaa677edb 100644 --- a/src/gallium/drivers/r600/r600_draw.c +++ b/src/gallium/drivers/r600/r600_draw.c @@ -225,3 +225,30 @@ void r600_draw_arrays(struct pipe_context *ctx, unsigned mode, draw.index_buffer = NULL; r600_draw_common(&draw); } + +void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info) +{ + struct r600_context *rctx = r600_context(ctx); + struct r600_draw draw; + + assert(info->index_bias == 0); + + draw.ctx = ctx; + draw.mode = info->mode; + draw.start = info->start; + draw.count = info->count; + if (info->indexed && rctx->index_buffer.buffer) { + draw.index_size = rctx->index_buffer.index_size; + draw.index_buffer = rctx->index_buffer.buffer; + + assert(rctx->index_buffer.offset % + rctx->index_buffer.index_size == 0); + draw.start += rctx->index_buffer.offset / + rctx->index_buffer.index_size; + } + else { + draw.index_size = 0; + draw.index_buffer = NULL; + } + r600_draw_common(&draw); +} diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index ff5df855c6..57879e8d8b 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -404,6 +404,23 @@ static void r600_set_vertex_buffers(struct pipe_context *ctx, rctx->nvertex_buffer = count; } +static void r600_set_index_buffer(struct pipe_context *ctx, + const struct pipe_index_buffer *ib) +{ + struct r600_context *rctx = r600_context(ctx); + + if (ib) { + pipe_resource_reference(&rctx->index_buffer.buffer, ib->buffer); + memcpy(&rctx->index_buffer, ib, sizeof(rctx->index_buffer)); + } + else { + pipe_resource_reference(&rctx->index_buffer.buffer, NULL); + memset(&rctx->index_buffer, 0, sizeof(rctx->index_buffer)); + } + + /* TODO make this more like a state */ +} + static void r600_set_viewport_state(struct pipe_context *ctx, const struct pipe_viewport_state *state) { @@ -449,6 +466,7 @@ void r600_init_state_functions(struct r600_context *rctx) rctx->context.set_scissor_state = r600_set_scissor_state; rctx->context.set_stencil_ref = r600_set_stencil_ref; rctx->context.set_vertex_buffers = r600_set_vertex_buffers; + rctx->context.set_index_buffer = r600_set_index_buffer; rctx->context.set_vertex_sampler_views = r600_set_vs_sampler_view; rctx->context.set_viewport_state = r600_set_viewport_state; rctx->context.sampler_view_destroy = r600_sampler_view_destroy; diff --git a/src/gallium/drivers/rbug/rbug_context.c b/src/gallium/drivers/rbug/rbug_context.c index e0dd5cf8c2..c748073b2a 100644 --- a/src/gallium/drivers/rbug/rbug_context.c +++ b/src/gallium/drivers/rbug/rbug_context.c @@ -185,6 +185,21 @@ rbug_draw_range_elements(struct pipe_context *_pipe, pipe_mutex_unlock(rb_pipe->draw_mutex); } +static void +rbug_draw_vbo(struct pipe_context *_pipe, const struct pipe_draw_info *info) +{ + struct rbug_context *rb_pipe = rbug_context(_pipe); + struct pipe_context *pipe = rb_pipe->pipe; + + pipe_mutex_lock(rb_pipe->draw_mutex); + rbug_draw_block_locked(rb_pipe, RBUG_BLOCK_BEFORE); + + pipe->draw_vbo(pipe, info); + + rbug_draw_block_locked(rb_pipe, RBUG_BLOCK_AFTER); + pipe_mutex_unlock(rb_pipe->draw_mutex); +} + static struct pipe_query * rbug_create_query(struct pipe_context *_pipe, unsigned query_type) @@ -744,6 +759,23 @@ rbug_set_vertex_buffers(struct pipe_context *_pipe, buffers); } +static void +rbug_set_index_buffer(struct pipe_context *_pipe, + const struct pipe_index_buffer *_ib) +{ + struct rbug_context *rb_pipe = rbug_context(_pipe); + struct pipe_context *pipe = rb_pipe->pipe; + struct pipe_index_buffer unwrapped_ib, *ib = NULL; + + if (_ib) { + unwrapped_ib = *_ib; + unwrapped_ib.buffer = rbug_resource_unwrap(_ib->buffer); + ib = &unwrapped_ib; + } + + pipe->set_index_buffer(pipe, ib); +} + static void rbug_set_sample_mask(struct pipe_context *_pipe, unsigned sample_mask) @@ -1043,6 +1075,7 @@ rbug_context_create(struct pipe_screen *_screen, struct pipe_context *pipe) rb_pipe->base.draw_arrays = rbug_draw_arrays; rb_pipe->base.draw_elements = rbug_draw_elements; rb_pipe->base.draw_range_elements = rbug_draw_range_elements; + rb_pipe->base.draw_vbo = rbug_draw_vbo; rb_pipe->base.create_query = rbug_create_query; rb_pipe->base.destroy_query = rbug_destroy_query; rb_pipe->base.begin_query = rbug_begin_query; @@ -1084,6 +1117,7 @@ rbug_context_create(struct pipe_screen *_screen, struct pipe_context *pipe) rb_pipe->base.set_fragment_sampler_views = rbug_set_fragment_sampler_views; rb_pipe->base.set_vertex_sampler_views = rbug_set_vertex_sampler_views; rb_pipe->base.set_vertex_buffers = rbug_set_vertex_buffers; + rb_pipe->base.set_index_buffer = rbug_set_index_buffer; rb_pipe->base.set_sample_mask = rbug_set_sample_mask; rb_pipe->base.resource_copy_region = rbug_resource_copy_region; rb_pipe->base.clear = rbug_clear; diff --git a/src/gallium/drivers/softpipe/sp_context.c b/src/gallium/drivers/softpipe/sp_context.c index 12ef98aac7..fa1fae6f00 100644 --- a/src/gallium/drivers/softpipe/sp_context.c +++ b/src/gallium/drivers/softpipe/sp_context.c @@ -282,12 +282,14 @@ softpipe_create_context( struct pipe_screen *screen, softpipe->pipe.set_viewport_state = softpipe_set_viewport_state; softpipe->pipe.set_stream_output_buffers = softpipe_set_stream_output_buffers; softpipe->pipe.set_vertex_buffers = softpipe_set_vertex_buffers; + softpipe->pipe.set_index_buffer = softpipe_set_index_buffer; softpipe->pipe.draw_arrays = softpipe_draw_arrays; softpipe->pipe.draw_elements = softpipe_draw_elements; softpipe->pipe.draw_range_elements = softpipe_draw_range_elements; softpipe->pipe.draw_arrays_instanced = softpipe_draw_arrays_instanced; softpipe->pipe.draw_elements_instanced = softpipe_draw_elements_instanced; + softpipe->pipe.draw_vbo = softpipe_draw_vbo; softpipe->pipe.draw_stream_output = softpipe_draw_stream_output; softpipe->pipe.clear = softpipe_clear; diff --git a/src/gallium/drivers/softpipe/sp_context.h b/src/gallium/drivers/softpipe/sp_context.h index 53115a827d..c5f53cfa61 100644 --- a/src/gallium/drivers/softpipe/sp_context.h +++ b/src/gallium/drivers/softpipe/sp_context.h @@ -82,6 +82,7 @@ struct softpipe_context { struct pipe_sampler_view *geometry_sampler_views[PIPE_MAX_GEOMETRY_SAMPLERS]; struct pipe_viewport_state viewport; struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS]; + struct pipe_index_buffer index_buffer; struct { struct softpipe_resource *buffer[PIPE_MAX_SO_BUFFERS]; int offset[PIPE_MAX_SO_BUFFERS]; diff --git a/src/gallium/drivers/softpipe/sp_draw_arrays.c b/src/gallium/drivers/softpipe/sp_draw_arrays.c index 9e727c9381..2855f55a0e 100644 --- a/src/gallium/drivers/softpipe/sp_draw_arrays.c +++ b/src/gallium/drivers/softpipe/sp_draw_arrays.c @@ -35,6 +35,7 @@ #include "pipe/p_context.h" #include "util/u_inlines.h" #include "util/u_prim.h" +#include "util/u_draw_quad.h" #include "sp_context.h" #include "sp_query.h" @@ -111,27 +112,19 @@ softpipe_draw_stream_output(struct pipe_context *pipe, unsigned mode) * When the min/max element indexes aren't known, minIndex should be 0 * and maxIndex should be ~0. */ -static void -softpipe_draw_range_elements_instanced(struct pipe_context *pipe, - struct pipe_resource *indexBuffer, - unsigned indexSize, - int indexBias, - unsigned minIndex, - unsigned maxIndex, - unsigned mode, - unsigned start, - unsigned count, - unsigned startInstance, - unsigned instanceCount) +void +softpipe_draw_vbo(struct pipe_context *pipe, + const struct pipe_draw_info *info) { struct softpipe_context *sp = softpipe_context(pipe); struct draw_context *draw = sp->draw; + void *mapped_indices = NULL; unsigned i; if (!softpipe_check_render_cond(sp)) return; - sp->reduced_api_prim = u_reduced_prim(mode); + sp->reduced_api_prim = u_reduced_prim(info->mode); if (sp->dirty) { softpipe_update_derived(sp); @@ -146,31 +139,27 @@ softpipe_draw_range_elements_instanced(struct pipe_context *pipe, } /* Map index buffer, if present */ - if (indexBuffer) { - void *mapped_indexes = softpipe_resource(indexBuffer)->data; - draw_set_mapped_element_buffer_range(draw, - indexSize, - indexBias, - minIndex, - maxIndex, - mapped_indexes); - } else { - /* no index/element buffer */ - draw_set_mapped_element_buffer_range(draw, - 0, 0, - start, - start + count - 1, - NULL); + if (info->indexed && sp->index_buffer.buffer) { + mapped_indices = softpipe_resource(sp->index_buffer.buffer)->data; + mapped_indices += sp->index_buffer.offset; } + draw_set_mapped_element_buffer_range(draw, (mapped_indices) ? + sp->index_buffer.index_size : 0, + info->index_bias, + info->min_index, + info->max_index, + mapped_indices); + /* draw! */ - draw_arrays_instanced(draw, mode, start, count, startInstance, instanceCount); + draw_arrays_instanced(draw, info->mode, info->start, info->count, + info->start_instance, info->instance_count); /* unmap vertex/index buffers - will cause draw module to flush */ for (i = 0; i < sp->num_vertex_buffers; i++) { draw_set_mapped_vertex_buffer(draw, i, NULL); } - if (indexBuffer) { + if (mapped_indices) { draw_set_mapped_element_buffer(draw, 0, 0, NULL); } @@ -185,6 +174,49 @@ softpipe_draw_range_elements_instanced(struct pipe_context *pipe, sp->dirty_render_cache = TRUE; } +static void +softpipe_draw_range_elements_instanced(struct pipe_context *pipe, + struct pipe_resource *indexBuffer, + unsigned indexSize, + int indexBias, + unsigned minIndex, + unsigned maxIndex, + unsigned mode, + unsigned start, + unsigned count, + unsigned startInstance, + unsigned instanceCount) +{ + struct softpipe_context *sp = softpipe_context(pipe); + struct pipe_draw_info info; + struct pipe_index_buffer saved_ib, ib; + + util_draw_init_info(&info); + info.mode = mode; + info.start = start; + info.count = count; + info.start_instance = startInstance; + info.instance_count = instanceCount; + info.index_bias = indexBias; + info.min_index = minIndex; + info.max_index = maxIndex; + + if (indexBuffer) { + info.indexed = TRUE; + + saved_ib = sp->index_buffer; + ib.buffer = indexBuffer; + ib.offset = 0; + ib.index_size = indexSize; + pipe->set_index_buffer(pipe, &ib); + } + + softpipe_draw_vbo(pipe, &info); + + if (indexBuffer) + pipe->set_index_buffer(pipe, &saved_ib); +} + void softpipe_draw_range_elements(struct pipe_context *pipe, diff --git a/src/gallium/drivers/softpipe/sp_state.h b/src/gallium/drivers/softpipe/sp_state.h index 7d6b86dce0..f04b0a5d31 100644 --- a/src/gallium/drivers/softpipe/sp_state.h +++ b/src/gallium/drivers/softpipe/sp_state.h @@ -221,6 +221,9 @@ void softpipe_set_vertex_buffers(struct pipe_context *, unsigned count, const struct pipe_vertex_buffer *); +void softpipe_set_index_buffer(struct pipe_context *, + const struct pipe_index_buffer *); + void softpipe_update_derived( struct softpipe_context *softpipe ); @@ -260,6 +263,10 @@ softpipe_draw_elements_instanced(struct pipe_context *pipe, unsigned startInstance, unsigned instanceCount); +void +softpipe_draw_vbo(struct pipe_context *pipe, + const struct pipe_draw_info *info); + void softpipe_draw_stream_output(struct pipe_context *pipe, unsigned mode); void diff --git a/src/gallium/drivers/softpipe/sp_state_vertex.c b/src/gallium/drivers/softpipe/sp_state_vertex.c index 462f4d2655..880a7c7cd2 100644 --- a/src/gallium/drivers/softpipe/sp_state_vertex.c +++ b/src/gallium/drivers/softpipe/sp_state_vertex.c @@ -88,3 +88,17 @@ softpipe_set_vertex_buffers(struct pipe_context *pipe, draw_set_vertex_buffers(softpipe->draw, count, buffers); } + +void +softpipe_set_index_buffer(struct pipe_context *pipe, + const struct pipe_index_buffer *ib) +{ + struct softpipe_context *softpipe = softpipe_context(pipe); + + if (ib) + memcpy(&softpipe->index_buffer, ib, sizeof(softpipe->index_buffer)); + else + memset(&softpipe->index_buffer, 0, sizeof(softpipe->index_buffer)); + + /* TODO make this more like a state */ +} diff --git a/src/gallium/drivers/svga/svga_context.h b/src/gallium/drivers/svga/svga_context.h index 9a46de643f..67a7614c8a 100644 --- a/src/gallium/drivers/svga/svga_context.h +++ b/src/gallium/drivers/svga/svga_context.h @@ -190,6 +190,7 @@ struct svga_state struct svga_vertex_shader *vs; struct pipe_vertex_buffer vb[PIPE_MAX_ATTRIBS]; + struct pipe_index_buffer ib; struct pipe_resource *cb[PIPE_SHADER_TYPES]; struct pipe_framebuffer_state framebuffer; diff --git a/src/gallium/drivers/svga/svga_pipe_draw.c b/src/gallium/drivers/svga/svga_pipe_draw.c index 58e930d983..fceaa83d70 100644 --- a/src/gallium/drivers/svga/svga_pipe_draw.c +++ b/src/gallium/drivers/svga/svga_pipe_draw.c @@ -248,10 +248,34 @@ svga_draw_arrays( struct pipe_context *pipe, start, count); } +static void +svga_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) +{ + struct svga_context *svga = svga_context(pipe); + + if (info->indexed && svga->curr.ib.buffer) { + unsigned offset; + + assert(svga->curr.ib.offset % svga->curr.ib.index_size == 0); + offset = svga->curr.ib.offset / svga->curr.ib.index_size; + + svga_draw_range_elements(pipe, svga->curr.ib.buffer, + svga->curr.ib.index_size, info->index_bias, + info->min_index, info->max_index, + info->mode, info->start + offset, info->count); + } + else { + svga_draw_range_elements(pipe, NULL, 0, 0, + info->min_index, info->max_index, + info->mode, info->start, info->count); + } +} + void svga_init_draw_functions( struct svga_context *svga ) { svga->pipe.draw_arrays = svga_draw_arrays; svga->pipe.draw_elements = svga_draw_elements; svga->pipe.draw_range_elements = svga_draw_range_elements; + svga->pipe.draw_vbo = svga_draw_vbo; } diff --git a/src/gallium/drivers/svga/svga_pipe_vertex.c b/src/gallium/drivers/svga/svga_pipe_vertex.c index 23808ad08e..86c79459f3 100644 --- a/src/gallium/drivers/svga/svga_pipe_vertex.c +++ b/src/gallium/drivers/svga/svga_pipe_vertex.c @@ -66,6 +66,24 @@ static void svga_set_vertex_buffers(struct pipe_context *pipe, } +static void svga_set_index_buffer(struct pipe_context *pipe, + const struct pipe_index_buffer *ib) +{ + struct svga_context *svga = svga_context(pipe); + + if (ib) { + pipe_resource_reference(&svga->curr.ib.buffer, ib->buffer); + memcpy(&svga->curr.ib, ib, sizeof(svga->curr.ib)); + } + else { + pipe_resource_reference(&svga->curr.ib.buffer, NULL); + memset(&svga->curr.ib, 0, sizeof(svga->curr.ib)); + } + + /* TODO make this more like a state */ +} + + static void * svga_create_vertex_elements_state(struct pipe_context *pipe, unsigned count, @@ -109,6 +127,7 @@ void svga_cleanup_vertex_state( struct svga_context *svga ) void svga_init_vertex_functions( struct svga_context *svga ) { svga->pipe.set_vertex_buffers = svga_set_vertex_buffers; + svga->pipe.set_index_buffer = svga_set_index_buffer; svga->pipe.create_vertex_elements_state = svga_create_vertex_elements_state; svga->pipe.bind_vertex_elements_state = svga_bind_vertex_elements_state; svga->pipe.delete_vertex_elements_state = svga_delete_vertex_elements_state; diff --git a/src/gallium/drivers/trace/tr_context.c b/src/gallium/drivers/trace/tr_context.c index 55dd6cf883..91c9bf0999 100644 --- a/src/gallium/drivers/trace/tr_context.c +++ b/src/gallium/drivers/trace/tr_context.c @@ -167,6 +167,32 @@ trace_context_draw_range_elements(struct pipe_context *_pipe, } +static INLINE void +trace_context_draw_vbo(struct pipe_context *_pipe, + const struct pipe_draw_info *info) +{ + struct trace_context *tr_ctx = trace_context(_pipe); + struct pipe_context *pipe = tr_ctx->pipe; + + trace_dump_call_begin("pipe_context", "draw_vbo"); + + trace_dump_arg(ptr, pipe); + trace_dump_arg(bool, info->indexed); + trace_dump_arg(uint, info->mode); + trace_dump_arg(uint, info->start); + trace_dump_arg(uint, info->count); + trace_dump_arg(uint, info->start_instance); + trace_dump_arg(uint, info->instance_count); + trace_dump_arg(int, info->index_bias); + trace_dump_arg(uint, info->min_index); + trace_dump_arg(uint, info->max_index); + + pipe->draw_vbo(pipe, info); + + trace_dump_call_end(); +} + + static INLINE struct pipe_query * trace_context_create_query(struct pipe_context *_pipe, unsigned query_type) @@ -1044,6 +1070,30 @@ trace_context_set_vertex_buffers(struct pipe_context *_pipe, } +static INLINE void +trace_context_set_index_buffer(struct pipe_context *_pipe, + const struct pipe_index_buffer *_ib) +{ + struct trace_context *tr_ctx = trace_context(_pipe); + struct pipe_context *pipe = tr_ctx->pipe; + struct pipe_index_buffer unwrapped_ib, *ib = NULL; + + if (_ib) { + unwrapped_ib = *_ib; + unwrapped_ib.buffer = trace_resource_unwrap(tr_ctx, _ib->buffer); + ib = &unwrapped_ib; + } + + trace_dump_call_begin("pipe_context", "set_index_buffer"); + + trace_dump_arg(ptr, pipe); + trace_dump_arg(index_buffer, ib); + + pipe->set_index_buffer(pipe, ib); + + trace_dump_call_end(); +} + static INLINE void trace_context_resource_copy_region(struct pipe_context *_pipe, struct pipe_resource *dst, @@ -1436,6 +1486,7 @@ trace_context_create(struct trace_screen *tr_scr, tr_ctx->base.draw_arrays = trace_context_draw_arrays; tr_ctx->base.draw_elements = trace_context_draw_elements; tr_ctx->base.draw_range_elements = trace_context_draw_range_elements; + tr_ctx->base.draw_vbo = trace_context_draw_vbo; tr_ctx->base.create_query = trace_context_create_query; tr_ctx->base.destroy_query = trace_context_destroy_query; tr_ctx->base.begin_query = trace_context_begin_query; @@ -1477,6 +1528,7 @@ trace_context_create(struct trace_screen *tr_scr, tr_ctx->base.create_sampler_view = trace_create_sampler_view; tr_ctx->base.sampler_view_destroy = trace_sampler_view_destroy; tr_ctx->base.set_vertex_buffers = trace_context_set_vertex_buffers; + tr_ctx->base.set_index_buffer = trace_context_set_index_buffer; tr_ctx->base.resource_copy_region = trace_context_resource_copy_region; tr_ctx->base.clear = trace_context_clear; tr_ctx->base.clear_render_target = trace_context_clear_render_target; diff --git a/src/gallium/drivers/trace/tr_dump_state.c b/src/gallium/drivers/trace/tr_dump_state.c index 1727c2a020..bd9a9bfaf1 100644 --- a/src/gallium/drivers/trace/tr_dump_state.c +++ b/src/gallium/drivers/trace/tr_dump_state.c @@ -533,6 +533,26 @@ void trace_dump_vertex_buffer(const struct pipe_vertex_buffer *state) } +void trace_dump_index_buffer(const struct pipe_index_buffer *state) +{ + if (!trace_dumping_enabled_locked()) + return; + + if(!state) { + trace_dump_null(); + return; + } + + trace_dump_struct_begin("pipe_index_buffer"); + + trace_dump_member(uint, state, index_size); + trace_dump_member(uint, state, offset); + trace_dump_member(resource_ptr, state, buffer); + + trace_dump_struct_end(); +} + + void trace_dump_vertex_element(const struct pipe_vertex_element *state) { if (!trace_dumping_enabled_locked()) diff --git a/src/gallium/drivers/trace/tr_dump_state.h b/src/gallium/drivers/trace/tr_dump_state.h index e614e8355e..2e70f4e1c7 100644 --- a/src/gallium/drivers/trace/tr_dump_state.h +++ b/src/gallium/drivers/trace/tr_dump_state.h @@ -75,6 +75,8 @@ void trace_dump_transfer(const struct pipe_transfer *state); void trace_dump_vertex_buffer(const struct pipe_vertex_buffer *state); +void trace_dump_index_buffer(const struct pipe_index_buffer *state); + void trace_dump_vertex_element(const struct pipe_vertex_element *state); -- cgit v1.2.3 From cd3ef7592cc9e2c83b175a8652c0153c578fb46b Mon Sep 17 00:00:00 2001 From: Chia-I Wu Date: Fri, 16 Jul 2010 04:21:11 +0800 Subject: gallium: Use unified pipe_context::draw_vbo. Update u_draw_quad, st/vega, and st/mesa to use pipe_context::draw_vbo. --- src/gallium/auxiliary/util/u_draw_quad.c | 2 +- src/gallium/state_trackers/vega/polygon.c | 3 +- src/mesa/state_tracker/st_draw.c | 169 ++++++++++++------------------ 3 files changed, 71 insertions(+), 103 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/auxiliary/util/u_draw_quad.c b/src/gallium/auxiliary/util/u_draw_quad.c index b37b48b5ae..0b6dc5880f 100644 --- a/src/gallium/auxiliary/util/u_draw_quad.c +++ b/src/gallium/auxiliary/util/u_draw_quad.c @@ -60,7 +60,7 @@ util_draw_vertex_buffer(struct pipe_context *pipe, /* note: vertex elements already set by caller */ /* draw */ - pipe->draw_arrays(pipe, prim_type, 0, num_verts); + util_draw_arrays(pipe, prim_type, 0, num_verts); } diff --git a/src/gallium/state_trackers/vega/polygon.c b/src/gallium/state_trackers/vega/polygon.c index e9c8f03137..bc94170eb9 100644 --- a/src/gallium/state_trackers/vega/polygon.c +++ b/src/gallium/state_trackers/vega/polygon.c @@ -301,8 +301,7 @@ static void draw_polygon(struct vg_context *ctx, cso_set_vertex_elements(ctx->cso_context, 1, &velement); /* draw */ - pipe->draw_arrays(pipe, PIPE_PRIM_TRIANGLE_FAN, - 0, poly->num_verts); + util_draw_arrays(pipe, PIPE_PRIM_TRIANGLE_FAN, 0, (uint) poly->num_verts); } void polygon_fill(struct polygon *poly, struct vg_context *ctx) diff --git a/src/mesa/state_tracker/st_draw.c b/src/mesa/state_tracker/st_draw.c index 5821da4889..5b05489270 100644 --- a/src/mesa/state_tracker/st_draw.c +++ b/src/mesa/state_tracker/st_draw.c @@ -58,6 +58,7 @@ #include "util/u_inlines.h" #include "util/u_format.h" #include "util/u_prim.h" +#include "util/u_draw_quad.h" #include "draw/draw_context.h" #include "cso_cache/cso_context.h" @@ -494,6 +495,49 @@ setup_non_interleaved_attribs(GLcontext *ctx, } +static void +setup_index_buffer(GLcontext *ctx, + const struct _mesa_index_buffer *ib, + struct pipe_index_buffer *ibuffer) +{ + struct st_context *st = st_context(ctx); + struct pipe_context *pipe = st->pipe; + + memset(ibuffer, 0, sizeof(*ibuffer)); + if (ib) { + struct gl_buffer_object *bufobj = ib->obj; + + switch (ib->type) { + case GL_UNSIGNED_INT: + ibuffer->index_size = 4; + break; + case GL_UNSIGNED_SHORT: + ibuffer->index_size = 2; + break; + case GL_UNSIGNED_BYTE: + ibuffer->index_size = 1; + break; + default: + assert(0); + return; + } + + /* get/create the index buffer object */ + if (bufobj && bufobj->Name) { + /* elements/indexes are in a real VBO */ + struct st_buffer_object *stobj = st_buffer_object(bufobj); + pipe_resource_reference(&ibuffer->buffer, stobj->buffer); + ibuffer->offset = pointer_to_offset(ib->ptr); + } + else { + /* element/indicies are in user space memory */ + ibuffer->buffer = + pipe_user_buffer_create(pipe->screen, (void *) ib->ptr, + ib->count * ibuffer->index_size, + PIPE_BIND_INDEX_BUFFER); + } + } +} /** * Prior to drawing, check that any uniforms referenced by the @@ -568,8 +612,11 @@ st_draw_vbo(GLcontext *ctx, GLuint attr; struct pipe_vertex_element velements[PIPE_MAX_ATTRIBS]; unsigned num_vbuffers, num_velements; + struct pipe_index_buffer ibuffer; GLboolean userSpace = GL_FALSE; GLboolean vertDataEdgeFlags; + struct pipe_draw_info info; + unsigned i; /* Mesa core state should have been validated already */ assert(ctx->NewState == 0x0); @@ -647,113 +694,35 @@ st_draw_vbo(GLcontext *ctx, if (num_vbuffers == 0 || num_velements == 0) return; - /* do actual drawing */ - if (ib) { - /* indexed primitive */ - struct gl_buffer_object *bufobj = ib->obj; - struct pipe_resource *indexBuf = NULL; - unsigned indexSize, indexOffset, i; + setup_index_buffer(ctx, ib, &ibuffer); + pipe->set_index_buffer(pipe, &ibuffer); - switch (ib->type) { - case GL_UNSIGNED_INT: - indexSize = 4; - break; - case GL_UNSIGNED_SHORT: - indexSize = 2; - break; - case GL_UNSIGNED_BYTE: - indexSize = 1; - break; - default: - assert(0); - return; - } - - /* get/create the index buffer object */ - if (bufobj && bufobj->Name) { - /* elements/indexes are in a real VBO */ - struct st_buffer_object *stobj = st_buffer_object(bufobj); - pipe_resource_reference(&indexBuf, stobj->buffer); - indexOffset = pointer_to_offset(ib->ptr) / indexSize; - } - else { - /* element/indicies are in user space memory */ - indexBuf = pipe_user_buffer_create(pipe->screen, (void *) ib->ptr, - ib->count * indexSize, - PIPE_BIND_INDEX_BUFFER); - indexOffset = 0; + util_draw_init_info(&info); + if (ib) { + info.indexed = TRUE; + if (min_index != ~0 && max_index != ~0) { + info.min_index = min_index; + info.max_index = max_index; } + } - /* draw */ - if (pipe->draw_range_elements && min_index != ~0 && max_index != ~0) { - /* XXX: exercise temporary path to pass min/max directly - * through to driver & draw module. These interfaces still - * need a bit of work... - */ - for (i = 0; i < nr_prims; i++) { - unsigned vcount = prims[i].count; - unsigned prim = translate_prim(ctx, prims[i].mode); - - if (u_trim_pipe_prim(prims[i].mode, &vcount)) { - pipe->draw_range_elements(pipe, indexBuf, indexSize, - prims[i].basevertex, - min_index, max_index, prim, - prims[i].start + indexOffset, vcount); - } - } - } - else { - for (i = 0; i < nr_prims; i++) { - unsigned vcount = prims[i].count; - unsigned prim = translate_prim(ctx, prims[i].mode); - - if (u_trim_pipe_prim(prims[i].mode, &vcount)) { - if (prims[i].num_instances == 1) { - pipe->draw_elements(pipe, indexBuf, - indexSize, - prims[i].basevertex, - prim, - prims[i].start + indexOffset, - vcount); - } - else { - pipe->draw_elements_instanced(pipe, indexBuf, - indexSize, - prims[i].basevertex, - prim, - prims[i].start + indexOffset, - vcount, - 0, /* startInstance */ - prims[i].num_instances); - } - } - } + /* do actual drawing */ + for (i = 0; i < nr_prims; i++) { + info.mode = translate_prim( ctx, prims[i].mode ); + info.start = prims[i].start; + info.count = prims[i].count; + info.instance_count = prims[i].num_instances; + info.index_bias = prims[i].basevertex; + if (!ib) { + info.min_index = info.start; + info.max_index = info.start + info.count - 1; } - pipe_resource_reference(&indexBuf, NULL); + if (u_trim_pipe_prim(info.mode, &info.count)) + pipe->draw_vbo(pipe, &info); } - else { - /* non-indexed */ - GLuint i; - - for (i = 0; i < nr_prims; i++) { - unsigned vcount = prims[i].count; - unsigned prim = translate_prim(ctx, prims[i].mode); - if (u_trim_pipe_prim(prims[i].mode, &vcount)) { - if (prims[i].num_instances == 1) { - pipe->draw_arrays(pipe, prim, prims[i].start, vcount); - } - else { - pipe->draw_arrays_instanced(pipe, prim, - prims[i].start, - vcount, - 0, /* startInstance */ - prims[i].num_instances); - } - } - } - } + pipe_resource_reference(&ibuffer.buffer, NULL); /* unreference buffers (frees wrapped user-space buffer objects) */ for (attr = 0; attr < num_vbuffers; attr++) { -- cgit v1.2.3 From a57f84251926045a3358822d0fd92ca95a4f0fde Mon Sep 17 00:00:00 2001 From: Chia-I Wu Date: Sat, 17 Jul 2010 01:10:46 +0800 Subject: gallium: Keep only pipe_context::draw_vbo. That is, remove pipe_context::draw_arrays, pipe_context::draw_elements, pipe_context::draw_arrays_instanced, pipe_context::draw_elements_instanced, pipe_context::draw_range_elements. --- src/gallium/drivers/cell/ppu/cell_draw_arrays.c | 63 --------- src/gallium/drivers/failover/fo_context.c | 44 ------- src/gallium/drivers/galahad/glhd_context.c | 68 ---------- src/gallium/drivers/i915/i915_context.c | 62 --------- src/gallium/drivers/i965/brw_draw.c | 64 --------- src/gallium/drivers/identity/id_context.c | 68 ---------- src/gallium/drivers/llvmpipe/lp_draw_arrays.c | 151 ---------------------- src/gallium/drivers/nv50/nv50_context.c | 4 - src/gallium/drivers/nv50/nv50_context.h | 18 --- src/gallium/drivers/nv50/nv50_vbo.c | 21 +-- src/gallium/drivers/nvfx/nvfx_context.c | 2 - src/gallium/drivers/nvfx/nvfx_context.h | 7 - src/gallium/drivers/nvfx/nvfx_vbo.c | 4 +- src/gallium/drivers/r300/r300_render.c | 69 ---------- src/gallium/drivers/r300/r300_render_stencilref.c | 55 +------- src/gallium/drivers/r600/r600_context.c | 3 - src/gallium/drivers/r600/r600_context.h | 11 -- src/gallium/drivers/r600/r600_draw.c | 50 ------- src/gallium/drivers/rbug/rbug_context.c | 86 ------------ src/gallium/drivers/softpipe/sp_context.c | 5 - src/gallium/drivers/softpipe/sp_draw_arrays.c | 150 --------------------- src/gallium/drivers/softpipe/sp_state.h | 35 ----- src/gallium/drivers/svga/svga_pipe_draw.c | 25 ---- src/gallium/drivers/trace/tr_context.c | 88 ------------- src/gallium/include/pipe/p_context.h | 41 ------ 25 files changed, 6 insertions(+), 1188 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/drivers/cell/ppu/cell_draw_arrays.c b/src/gallium/drivers/cell/ppu/cell_draw_arrays.c index e06226fbfe..4adef5b8c0 100644 --- a/src/gallium/drivers/cell/ppu/cell_draw_arrays.c +++ b/src/gallium/drivers/cell/ppu/cell_draw_arrays.c @@ -34,7 +34,6 @@ #include "pipe/p_defines.h" #include "pipe/p_context.h" #include "util/u_inlines.h" -#include "util/u_draw_quad.h" #include "cell_context.h" #include "cell_draw_arrays.h" @@ -113,71 +112,9 @@ cell_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) } -static void -cell_draw_range_elements(struct pipe_context *pipe, - struct pipe_resource *indexBuffer, - unsigned indexSize, - int indexBias, - unsigned min_index, - unsigned max_index, - unsigned mode, unsigned start, unsigned count) -{ - struct cell_context *cell = cell_context(pipe); - struct pipe_draw_info info; - struct pipe_index_buffer saved_ib, ib; - - util_draw_init_info(&info); - info.mode = mode; - info.start = start; - info.count = count; - info.index_bias = indexBias; - info.min_index = min_index; - info.max_index = max_index; - - if (indexBuffer) { - info.indexed = TRUE; - saved_ib = cell->index_buffer; - - ib.buffer = indexBuffer; - ib.offset = 0; - ib.index_size = indexSize; - pipe->set_index_buffer(pipe, &ib); - } - - cell_draw_vbo(pipe, &info); - - if (indexBuffer) - pipe->set_index_buffer(pipe, &saved_ib); -} - - -static void -cell_draw_elements(struct pipe_context *pipe, - struct pipe_resource *indexBuffer, - unsigned indexSize, int indexBias, - unsigned mode, unsigned start, unsigned count) -{ - cell_draw_range_elements( pipe, indexBuffer, - indexSize, indexBias, - 0, 0xffffffff, - mode, start, count ); -} - - -static void -cell_draw_arrays(struct pipe_context *pipe, unsigned mode, - unsigned start, unsigned count) -{ - cell_draw_elements(pipe, NULL, 0, 0, mode, start, count); -} - - void cell_init_draw_functions(struct cell_context *cell) { - cell->pipe.draw_arrays = cell_draw_arrays; - cell->pipe.draw_elements = cell_draw_elements; - cell->pipe.draw_range_elements = cell_draw_range_elements; cell->pipe.draw_vbo = cell_draw_vbo; } diff --git a/src/gallium/drivers/failover/fo_context.c b/src/gallium/drivers/failover/fo_context.c index 1048d58313..761a0fce72 100644 --- a/src/gallium/drivers/failover/fo_context.c +++ b/src/gallium/drivers/failover/fo_context.c @@ -28,7 +28,6 @@ #include "pipe/p_defines.h" #include "util/u_memory.h" -#include "util/u_draw_quad.h" #include "pipe/p_context.h" #include "fo_context.h" @@ -88,47 +87,6 @@ static void failover_draw_vbo( struct pipe_context *pipe, } } - -static void failover_draw_elements( struct pipe_context *pipe, - struct pipe_resource *indexResource, - unsigned indexSize, - int indexBias, - unsigned prim, - unsigned start, - unsigned count) -{ - struct failover_context *failover = failover_context( pipe ); - struct pipe_draw_info info; - struct pipe_index_buffer saved_ib, ib; - - util_draw_init_info(&info); - info.mode = prim; - info.start = start; - info.count = count; - - if (indexResource) { - info.indexed = TRUE; - saved_ib = failover->index_buffer; - - ib.buffer = indexResource; - ib.offset = 0; - ib.index_size = indexSize; - pipe->set_index_buffer(pipe, &ib); - } - - failover_draw_vbo(pipe, &info); - - if (indexResource) - pipe->set_index_buffer(pipe, &saved_ib); -} - - -static void failover_draw_arrays( struct pipe_context *pipe, - unsigned prim, unsigned start, unsigned count) -{ - failover_draw_elements(pipe, NULL, 0, 0, prim, start, count); -} - static unsigned int failover_is_resource_referenced( struct pipe_context *_pipe, struct pipe_resource *resource, @@ -161,8 +119,6 @@ struct pipe_context *failover_create( struct pipe_context *hw, failover->pipe.get_paramf = hw->get_paramf; #endif - failover->pipe.draw_arrays = failover_draw_arrays; - failover->pipe.draw_elements = failover_draw_elements; failover->pipe.draw_vbo = failover_draw_vbo; failover->pipe.clear = hw->clear; failover->pipe.clear_render_target = hw->clear_render_target; diff --git a/src/gallium/drivers/galahad/glhd_context.c b/src/gallium/drivers/galahad/glhd_context.c index 6473f2d499..fe14a287ef 100644 --- a/src/gallium/drivers/galahad/glhd_context.c +++ b/src/gallium/drivers/galahad/glhd_context.c @@ -47,71 +47,6 @@ galahad_destroy(struct pipe_context *_pipe) FREE(glhd_pipe); } -static void -galahad_draw_arrays(struct pipe_context *_pipe, - unsigned prim, - unsigned start, - unsigned count) -{ - struct galahad_context *glhd_pipe = galahad_context(_pipe); - struct pipe_context *pipe = glhd_pipe->pipe; - - pipe->draw_arrays(pipe, - prim, - start, - count); -} - -static void -galahad_draw_elements(struct pipe_context *_pipe, - struct pipe_resource *_indexResource, - unsigned indexSize, - int indexBias, - unsigned prim, - unsigned start, - unsigned count) -{ - struct galahad_context *glhd_pipe = galahad_context(_pipe); - struct galahad_resource *glhd_resource = galahad_resource(_indexResource); - struct pipe_context *pipe = glhd_pipe->pipe; - struct pipe_resource *indexResource = glhd_resource->resource; - - pipe->draw_elements(pipe, - indexResource, - indexSize, - indexBias, - prim, - start, - count); -} - -static void -galahad_draw_range_elements(struct pipe_context *_pipe, - struct pipe_resource *_indexResource, - unsigned indexSize, - int indexBias, - unsigned minIndex, - unsigned maxIndex, - unsigned mode, - unsigned start, - unsigned count) -{ - struct galahad_context *glhd_pipe = galahad_context(_pipe); - struct galahad_resource *glhd_resource = galahad_resource(_indexResource); - struct pipe_context *pipe = glhd_pipe->pipe; - struct pipe_resource *indexResource = glhd_resource->resource; - - pipe->draw_range_elements(pipe, - indexResource, - indexSize, - indexBias, - minIndex, - maxIndex, - mode, - start, - count); -} - static void galahad_draw_vbo(struct pipe_context *_pipe, const struct pipe_draw_info *info) @@ -979,9 +914,6 @@ galahad_context_create(struct pipe_screen *_screen, struct pipe_context *pipe) glhd_pipe->base.draw = NULL; glhd_pipe->base.destroy = galahad_destroy; - glhd_pipe->base.draw_arrays = galahad_draw_arrays; - glhd_pipe->base.draw_elements = galahad_draw_elements; - glhd_pipe->base.draw_range_elements = galahad_draw_range_elements; glhd_pipe->base.draw_vbo = galahad_draw_vbo; glhd_pipe->base.create_query = galahad_create_query; glhd_pipe->base.destroy_query = galahad_destroy_query; diff --git a/src/gallium/drivers/i915/i915_context.c b/src/gallium/drivers/i915/i915_context.c index ca07b3e235..496efc99da 100644 --- a/src/gallium/drivers/i915/i915_context.c +++ b/src/gallium/drivers/i915/i915_context.c @@ -36,7 +36,6 @@ #include "pipe/p_defines.h" #include "util/u_inlines.h" #include "util/u_memory.h" -#include "util/u_draw_quad.h" #include "pipe/p_screen.h" @@ -102,64 +101,6 @@ i915_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) } } -static void -i915_draw_range_elements(struct pipe_context *pipe, - struct pipe_resource *indexBuffer, - unsigned indexSize, - int indexBias, - unsigned min_index, - unsigned max_index, - unsigned prim, unsigned start, unsigned count) -{ - struct i915_context *i915 = i915_context(pipe); - struct pipe_draw_info info; - struct pipe_index_buffer saved_ib, ib; - - util_draw_init_info(&info); - info.mode = prim; - info.start = start; - info.count = count; - info.index_bias = indexBias; - info.min_index = min_index; - info.max_index = max_index; - - if (indexBuffer) { - info.indexed = TRUE; - saved_ib = i915->index_buffer; - - ib.buffer = indexBuffer; - ib.offset = 0; - ib.index_size = indexSize; - pipe->set_index_buffer(pipe, &ib); - } - - i915_draw_vbo(pipe, &info); - - if (indexBuffer) - pipe->set_index_buffer(pipe, &saved_ib); -} - -static void -i915_draw_elements(struct pipe_context *pipe, - struct pipe_resource *indexBuffer, - unsigned indexSize, int indexBias, - unsigned prim, unsigned start, unsigned count) -{ - i915_draw_range_elements(pipe, indexBuffer, - indexSize, indexBias, - 0, 0xffffffff, - prim, start, count); -} - -static void -i915_draw_arrays(struct pipe_context *pipe, - unsigned prim, unsigned start, unsigned count) -{ - i915_draw_elements(pipe, NULL, 0, 0, prim, start, count); -} - - - /* * Generic context functions @@ -203,9 +144,6 @@ i915_create_context(struct pipe_screen *screen, void *priv) i915->base.clear = i915_clear; - i915->base.draw_arrays = i915_draw_arrays; - i915->base.draw_elements = i915_draw_elements; - i915->base.draw_range_elements = i915_draw_range_elements; i915->base.draw_vbo = i915_draw_vbo; /* diff --git a/src/gallium/drivers/i965/brw_draw.c b/src/gallium/drivers/i965/brw_draw.c index fa7d047e0b..3ab9024c31 100644 --- a/src/gallium/drivers/i965/brw_draw.c +++ b/src/gallium/drivers/i965/brw_draw.c @@ -29,7 +29,6 @@ #include "util/u_inlines.h" #include "util/u_prim.h" #include "util/u_upload_mgr.h" -#include "util/u_draw_quad.h" #include "brw_draw.h" #include "brw_defines.h" @@ -220,74 +219,11 @@ brw_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) } } -static void -brw_draw_range_elements(struct pipe_context *pipe, - struct pipe_resource *index_buffer, - unsigned index_size, int index_bias, - unsigned min_index, - unsigned max_index, - unsigned mode, unsigned start, unsigned count) -{ - struct brw_context *brw = brw_context(pipe); - struct pipe_draw_info info; - struct pipe_index_buffer saved_ib, ib; - - util_draw_init_info(&info); - info.mode = mode; - info.start = start; - info.count = count; - info.index_bias = index_bias; - info.min_index = min_index; - info.max_index = max_index; - - if (index_buffer) { - info.indexed = TRUE; - saved_ib.buffer = brw->curr.index_buffer; - saved_ib.offset = brw->curr.index_offset; - saved_ib.index_size = brw->curr.index_size; - - ib.buffer = index_buffer; - ib.offset = 0; - ib.index_size = index_size; - pipe->set_index_buffer(pipe, &ib); - } - - brw_draw_vbo(pipe, &info); - - if (index_buffer) - pipe->set_index_buffer(pipe, &saved_ib); -} - -static void -brw_draw_elements(struct pipe_context *pipe, - struct pipe_resource *index_buffer, - unsigned index_size, int index_bias, - unsigned mode, - unsigned start, unsigned count) -{ - brw_draw_range_elements( pipe, index_buffer, - index_size, index_bias, - 0, 0xffffffff, - mode, - start, count ); -} - -static void -brw_draw_arrays(struct pipe_context *pipe, unsigned mode, - unsigned start, unsigned count) -{ - brw_draw_elements(pipe, NULL, 0, 0, mode, start, count); -} - - boolean brw_draw_init( struct brw_context *brw ) { /* Register our drawing function: */ - brw->base.draw_arrays = brw_draw_arrays; - brw->base.draw_elements = brw_draw_elements; - brw->base.draw_range_elements = brw_draw_range_elements; brw->base.draw_vbo = brw_draw_vbo; /* Create helpers for uploading data in user buffers: diff --git a/src/gallium/drivers/identity/id_context.c b/src/gallium/drivers/identity/id_context.c index e10d3a1413..de83c24905 100644 --- a/src/gallium/drivers/identity/id_context.c +++ b/src/gallium/drivers/identity/id_context.c @@ -45,71 +45,6 @@ identity_destroy(struct pipe_context *_pipe) FREE(id_pipe); } -static void -identity_draw_arrays(struct pipe_context *_pipe, - unsigned prim, - unsigned start, - unsigned count) -{ - struct identity_context *id_pipe = identity_context(_pipe); - struct pipe_context *pipe = id_pipe->pipe; - - pipe->draw_arrays(pipe, - prim, - start, - count); -} - -static void -identity_draw_elements(struct pipe_context *_pipe, - struct pipe_resource *_indexResource, - unsigned indexSize, - int indexBias, - unsigned prim, - unsigned start, - unsigned count) -{ - struct identity_context *id_pipe = identity_context(_pipe); - struct identity_resource *id_resource = identity_resource(_indexResource); - struct pipe_context *pipe = id_pipe->pipe; - struct pipe_resource *indexResource = id_resource->resource; - - pipe->draw_elements(pipe, - indexResource, - indexSize, - indexBias, - prim, - start, - count); -} - -static void -identity_draw_range_elements(struct pipe_context *_pipe, - struct pipe_resource *_indexResource, - unsigned indexSize, - int indexBias, - unsigned minIndex, - unsigned maxIndex, - unsigned mode, - unsigned start, - unsigned count) -{ - struct identity_context *id_pipe = identity_context(_pipe); - struct identity_resource *id_resource = identity_resource(_indexResource); - struct pipe_context *pipe = id_pipe->pipe; - struct pipe_resource *indexResource = id_resource->resource; - - pipe->draw_range_elements(pipe, - indexResource, - indexSize, - indexBias, - minIndex, - maxIndex, - mode, - start, - count); -} - static void identity_draw_vbo(struct pipe_context *_pipe, const struct pipe_draw_info *info) @@ -917,9 +852,6 @@ identity_context_create(struct pipe_screen *_screen, struct pipe_context *pipe) id_pipe->base.draw = NULL; id_pipe->base.destroy = identity_destroy; - id_pipe->base.draw_arrays = identity_draw_arrays; - id_pipe->base.draw_elements = identity_draw_elements; - id_pipe->base.draw_range_elements = identity_draw_range_elements; id_pipe->base.draw_vbo = identity_draw_vbo; id_pipe->base.create_query = identity_create_query; id_pipe->base.destroy_query = identity_destroy_query; diff --git a/src/gallium/drivers/llvmpipe/lp_draw_arrays.c b/src/gallium/drivers/llvmpipe/lp_draw_arrays.c index b6dbb9d288..22c2836e22 100644 --- a/src/gallium/drivers/llvmpipe/lp_draw_arrays.c +++ b/src/gallium/drivers/llvmpipe/lp_draw_arrays.c @@ -34,7 +34,6 @@ #include "pipe/p_defines.h" #include "pipe/p_context.h" #include "util/u_prim.h" -#include "util/u_draw_quad.h" #include "lp_context.h" #include "lp_state.h" @@ -109,158 +108,8 @@ llvmpipe_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) } -static void -llvmpipe_draw_range_elements_instanced(struct pipe_context *pipe, - struct pipe_resource *indexBuffer, - unsigned indexSize, - int indexBias, - unsigned minIndex, - unsigned maxIndex, - unsigned mode, - unsigned start, - unsigned count, - unsigned startInstance, - unsigned instanceCount) -{ - struct llvmpipe_context *lp = llvmpipe_context(pipe); - struct pipe_draw_info info; - struct pipe_index_buffer saved_ib, ib; - - util_draw_init_info(&info); - info.mode = mode; - info.start = start; - info.count = count; - info.start_instance = startInstance; - info.instance_count = instanceCount; - - info.index_bias = indexBias; - info.min_index = minIndex; - info.max_index = maxIndex; - - if (indexBuffer) { - info.indexed = TRUE; - saved_ib = lp->index_buffer; - - ib.buffer = indexBuffer; - ib.offset = 0; - ib.index_size = indexSize; - pipe->set_index_buffer(pipe, &ib); - } - - llvmpipe_draw_vbo(pipe, &info); - - if (indexBuffer) - pipe->set_index_buffer(pipe, &saved_ib); -} - -static void -llvmpipe_draw_arrays_instanced(struct pipe_context *pipe, - unsigned mode, - unsigned start, - unsigned count, - unsigned startInstance, - unsigned instanceCount) -{ - llvmpipe_draw_range_elements_instanced(pipe, - NULL, /* no indexBuffer */ - 0, 0, /* indexSize, indexBias */ - 0, ~0, /* minIndex, maxIndex */ - mode, - start, - count, - startInstance, - instanceCount); -} - - -static void -llvmpipe_draw_elements_instanced(struct pipe_context *pipe, - struct pipe_resource *indexBuffer, - unsigned indexSize, - int indexBias, - unsigned mode, - unsigned start, - unsigned count, - unsigned startInstance, - unsigned instanceCount) -{ - llvmpipe_draw_range_elements_instanced(pipe, - indexBuffer, - indexSize, indexBias, - 0, ~0, /* minIndex, maxIndex */ - mode, - start, - count, - startInstance, - instanceCount); -} - - -static void -llvmpipe_draw_elements(struct pipe_context *pipe, - struct pipe_resource *indexBuffer, - unsigned indexSize, - int indexBias, - unsigned mode, - unsigned start, - unsigned count) -{ - llvmpipe_draw_range_elements_instanced(pipe, - indexBuffer, - indexSize, indexBias, - 0, 0xffffffff, /* min, maxIndex */ - mode, start, count, - 0, /* startInstance */ - 1); /* instanceCount */ -} - - -static void -llvmpipe_draw_range_elements(struct pipe_context *pipe, - struct pipe_resource *indexBuffer, - unsigned indexSize, - int indexBias, - unsigned min_index, - unsigned max_index, - unsigned mode, - unsigned start, - unsigned count) -{ - llvmpipe_draw_range_elements_instanced(pipe, - indexBuffer, - indexSize, indexBias, - min_index, max_index, - mode, start, count, - 0, /* startInstance */ - 1); /* instanceCount */ -} - - -static void -llvmpipe_draw_arrays(struct pipe_context *pipe, - unsigned mode, - unsigned start, - unsigned count) -{ - llvmpipe_draw_range_elements_instanced(pipe, - NULL, /* indexBuffer */ - 0, /* indexSize */ - 0, /* indexBias */ - 0, ~0, /* min, maxIndex */ - mode, start, count, - 0, /* startInstance */ - 1); /* instanceCount */ -} - - void llvmpipe_init_draw_funcs(struct llvmpipe_context *llvmpipe) { - llvmpipe->pipe.draw_arrays = llvmpipe_draw_arrays; - llvmpipe->pipe.draw_elements = llvmpipe_draw_elements; - llvmpipe->pipe.draw_range_elements = llvmpipe_draw_range_elements; - llvmpipe->pipe.draw_arrays_instanced = llvmpipe_draw_arrays_instanced; - llvmpipe->pipe.draw_elements_instanced = llvmpipe_draw_elements_instanced; - llvmpipe->pipe.draw_vbo = llvmpipe_draw_vbo; } diff --git a/src/gallium/drivers/nv50/nv50_context.c b/src/gallium/drivers/nv50/nv50_context.c index 3fc39c1137..0874cb5e4e 100644 --- a/src/gallium/drivers/nv50/nv50_context.c +++ b/src/gallium/drivers/nv50/nv50_context.c @@ -82,10 +82,6 @@ nv50_create(struct pipe_screen *pscreen, void *priv) nv50->pipe.destroy = nv50_destroy; - nv50->pipe.draw_arrays = nv50_draw_arrays; - nv50->pipe.draw_arrays_instanced = nv50_draw_arrays_instanced; - nv50->pipe.draw_elements = nv50_draw_elements; - nv50->pipe.draw_elements_instanced = nv50_draw_elements_instanced; nv50->pipe.draw_vbo = nv50_draw_vbo; nv50->pipe.clear = nv50_clear; diff --git a/src/gallium/drivers/nv50/nv50_context.h b/src/gallium/drivers/nv50/nv50_context.h index a7c2b5d487..d24d6c50ea 100644 --- a/src/gallium/drivers/nv50/nv50_context.h +++ b/src/gallium/drivers/nv50/nv50_context.h @@ -180,24 +180,6 @@ nv50_surface_do_copy(struct nv50_screen *screen, struct pipe_surface *dst, extern struct draw_stage *nv50_draw_render_stage(struct nv50_context *nv50); /* nv50_vbo.c */ -extern void nv50_draw_arrays(struct pipe_context *, unsigned mode, - unsigned start, unsigned count); -extern void nv50_draw_arrays_instanced(struct pipe_context *, unsigned mode, - unsigned start, unsigned count, - unsigned startInstance, - unsigned instanceCount); -extern void nv50_draw_elements(struct pipe_context *pipe, - struct pipe_resource *indexBuffer, - unsigned indexSize, int indexBias, - unsigned mode, unsigned start, - unsigned count); -extern void nv50_draw_elements_instanced(struct pipe_context *pipe, - struct pipe_resource *indexBuffer, - unsigned indexSize, int indexBias, - unsigned mode, unsigned start, - unsigned count, - unsigned startInstance, - unsigned instanceCount); extern void nv50_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info); extern void nv50_vtxelt_construct(struct nv50_vtxelt_stateobj *cso); diff --git a/src/gallium/drivers/nv50/nv50_vbo.c b/src/gallium/drivers/nv50/nv50_vbo.c index 11ffc182c2..e7f8fe33ed 100644 --- a/src/gallium/drivers/nv50/nv50_vbo.c +++ b/src/gallium/drivers/nv50/nv50_vbo.c @@ -173,7 +173,7 @@ instance_step(struct nv50_context *nv50, struct instance *a) } } -void +static void nv50_draw_arrays_instanced(struct pipe_context *pipe, unsigned mode, unsigned start, unsigned count, unsigned startInstance, unsigned instanceCount) @@ -220,13 +220,6 @@ nv50_draw_arrays_instanced(struct pipe_context *pipe, } } -void -nv50_draw_arrays(struct pipe_context *pipe, unsigned mode, unsigned start, - unsigned count) -{ - nv50_draw_arrays_instanced(pipe, mode, start, count, 0, 1); -} - struct inline_ctx { struct nv50_context *nv50; void *map; @@ -384,7 +377,7 @@ nv50_draw_elements_inline(struct pipe_context *pipe, pipe_buffer_unmap(pipe, indexBuffer, transfer); } -void +static void nv50_draw_elements_instanced(struct pipe_context *pipe, struct pipe_resource *indexBuffer, unsigned indexSize, int indexBias, @@ -463,16 +456,6 @@ nv50_draw_elements_instanced(struct pipe_context *pipe, } } -void -nv50_draw_elements(struct pipe_context *pipe, - struct pipe_resource *indexBuffer, - unsigned indexSize, int indexBias, - unsigned mode, unsigned start, unsigned count) -{ - nv50_draw_elements_instanced(pipe, indexBuffer, indexSize, indexBias, - mode, start, count, 0, 1); -} - void nv50_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) { diff --git a/src/gallium/drivers/nvfx/nvfx_context.c b/src/gallium/drivers/nvfx/nvfx_context.c index f30795f69a..7218abff22 100644 --- a/src/gallium/drivers/nvfx/nvfx_context.c +++ b/src/gallium/drivers/nvfx/nvfx_context.c @@ -55,8 +55,6 @@ nvfx_create(struct pipe_screen *pscreen, void *priv) nvfx->pipe.screen = pscreen; nvfx->pipe.priv = priv; nvfx->pipe.destroy = nvfx_destroy; - nvfx->pipe.draw_arrays = nvfx_draw_arrays; - nvfx->pipe.draw_elements = nvfx_draw_elements; nvfx->pipe.draw_vbo = nvfx_draw_vbo; nvfx->pipe.clear = nvfx_clear; nvfx->pipe.flush = nvfx_flush; diff --git a/src/gallium/drivers/nvfx/nvfx_context.h b/src/gallium/drivers/nvfx/nvfx_context.h index d6cd272eed..89f94c10bd 100644 --- a/src/gallium/drivers/nvfx/nvfx_context.h +++ b/src/gallium/drivers/nvfx/nvfx_context.h @@ -236,13 +236,6 @@ extern void nvfx_init_transfer_functions(struct nvfx_context *nvfx); /* nvfx_vbo.c */ extern boolean nvfx_vbo_validate(struct nvfx_context *nvfx); extern void nvfx_vbo_relocate(struct nvfx_context *nvfx); -extern void nvfx_draw_arrays(struct pipe_context *, unsigned mode, - unsigned start, unsigned count); -extern void nvfx_draw_elements(struct pipe_context *pipe, - struct pipe_resource *indexBuffer, - unsigned indexSize, int indexBias, - unsigned mode, unsigned start, - unsigned count); extern void nvfx_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info); diff --git a/src/gallium/drivers/nvfx/nvfx_vbo.c b/src/gallium/drivers/nvfx/nvfx_vbo.c index 23a59b589b..4aa3793842 100644 --- a/src/gallium/drivers/nvfx/nvfx_vbo.c +++ b/src/gallium/drivers/nvfx/nvfx_vbo.c @@ -158,7 +158,7 @@ nvfx_vbo_static_attrib(struct nvfx_context *nvfx, pipe_buffer_unmap(&nvfx->pipe, vb->buffer, transfer); } -void +static void nvfx_draw_arrays(struct pipe_context *pipe, unsigned mode, unsigned start, unsigned count) { @@ -463,7 +463,7 @@ nvfx_draw_elements_vbo(struct pipe_context *pipe, } } -void +static void nvfx_draw_elements(struct pipe_context *pipe, struct pipe_resource *indexBuffer, unsigned indexSize, int indexBias, diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c index da96098cc4..c179b07e86 100644 --- a/src/gallium/drivers/r300/r300_render.c +++ b/src/gallium/drivers/r300/r300_render.c @@ -33,7 +33,6 @@ #include "util/u_memory.h" #include "util/u_upload_mgr.h" #include "util/u_prim.h" -#include "util/u_draw_quad.h" #include "r300_cs.h" #include "r300_context.h" @@ -567,19 +566,6 @@ static void r300_draw_range_elements(struct pipe_context* pipe, } } -/* Simple helpers for context setup. Should probably be moved to util. */ -static void r300_draw_elements(struct pipe_context* pipe, - struct pipe_resource* indexBuffer, - unsigned indexSize, int indexBias, unsigned mode, - unsigned start, unsigned count) -{ - struct r300_context *r300 = r300_context(pipe); - - pipe->draw_range_elements(pipe, indexBuffer, indexSize, indexBias, - 0, r300->vertex_buffer_max_index, - mode, start, count); -} - static void r300_draw_arrays(struct pipe_context* pipe, unsigned mode, unsigned start, unsigned count) { @@ -735,54 +721,6 @@ static void r300_swtcl_draw_vbo(struct pipe_context* pipe, } } -/* SW TCL elements, using Draw. */ -static void r300_swtcl_draw_range_elements(struct pipe_context* pipe, - struct pipe_resource* indexBuffer, - unsigned indexSize, - int indexBias, - unsigned minIndex, - unsigned maxIndex, - unsigned mode, - unsigned start, - unsigned count) -{ - struct r300_context* r300 = r300_context(pipe); - struct pipe_draw_info info; - struct pipe_index_buffer saved_ib, ib; - - util_draw_init_info(&info); - info.mode = mode; - info.start = start; - info.count = count; - info.index_bias = indexBias; - info.min_index = minIndex; - info.max_index = maxIndex; - - if (indexBuffer) { - info.indexed = TRUE; - - saved_ib = r300->index_buffer; - ib.buffer = indexBuffer; - ib.offset = 0; - ib.index_size = indexSize; - pipe->set_index_buffer(pipe, &ib); - } - - r300_swtcl_draw_vbo(pipe, &info); - - if (indexBuffer) - pipe->set_index_buffer(pipe, &saved_ib); -} - -static void r300_swtcl_draw_arrays(struct pipe_context* pipe, - unsigned mode, - unsigned start, - unsigned count) -{ - r300_swtcl_draw_range_elements(pipe, NULL, 0, 0, - start, start + count -1, mode, start, count); -} - /* Object for rendering using Draw. */ struct r300_render { /* Parent class */ @@ -1179,17 +1117,10 @@ static void r300_resource_resolve(struct pipe_context* pipe, void r300_init_render_functions(struct r300_context *r300) { - /* Set generic functions. */ - r300->context.draw_elements = r300_draw_elements; - /* Set draw functions based on presence of HW TCL. */ if (r300->screen->caps.has_tcl) { - r300->context.draw_arrays = r300_draw_arrays; - r300->context.draw_range_elements = r300_draw_range_elements; r300->context.draw_vbo = r300_draw_vbo; } else { - r300->context.draw_arrays = r300_swtcl_draw_arrays; - r300->context.draw_range_elements = r300_swtcl_draw_range_elements; r300->context.draw_vbo = r300_swtcl_draw_vbo; } diff --git a/src/gallium/drivers/r300/r300_render_stencilref.c b/src/gallium/drivers/r300/r300_render_stencilref.c index 6d801cf159..1f035d64a2 100644 --- a/src/gallium/drivers/r300/r300_render_stencilref.c +++ b/src/gallium/drivers/r300/r300_render_stencilref.c @@ -34,14 +34,6 @@ #include "r300_reg.h" struct r300_stencilref_context { - void (*draw_arrays)(struct pipe_context *pipe, - unsigned mode, unsigned start, unsigned count); - - void (*draw_range_elements)( - struct pipe_context *pipe, struct pipe_resource *indexBuffer, - unsigned indexSize, int indexBias, unsigned minIndex, unsigned maxIndex, - unsigned mode, unsigned start, unsigned count); - void (*draw_vbo)(struct pipe_context *pipe, const struct pipe_draw_info *info); @@ -108,45 +100,6 @@ static void r300_stencilref_end(struct r300_context *r300) r300->dsa_state.dirty = TRUE; } -static void r300_stencilref_draw_arrays(struct pipe_context *pipe, unsigned mode, - unsigned start, unsigned count) -{ - struct r300_context *r300 = r300_context(pipe); - struct r300_stencilref_context *sr = r300->stencilref_fallback; - - if (!r300_stencilref_needed(r300)) { - sr->draw_arrays(pipe, mode, start, count); - } else { - r300_stencilref_begin(r300); - sr->draw_arrays(pipe, mode, start, count); - r300_stencilref_switch_side(r300); - sr->draw_arrays(pipe, mode, start, count); - r300_stencilref_end(r300); - } -} - -static void r300_stencilref_draw_range_elements( - struct pipe_context *pipe, struct pipe_resource *indexBuffer, - unsigned indexSize, int indexBias, unsigned minIndex, unsigned maxIndex, - unsigned mode, unsigned start, unsigned count) -{ - struct r300_context *r300 = r300_context(pipe); - struct r300_stencilref_context *sr = r300->stencilref_fallback; - - if (!r300_stencilref_needed(r300)) { - sr->draw_range_elements(pipe, indexBuffer, indexSize, indexBias, - minIndex, maxIndex, mode, start, count); - } else { - r300_stencilref_begin(r300); - sr->draw_range_elements(pipe, indexBuffer, indexSize, indexBias, - minIndex, maxIndex, mode, start, count); - r300_stencilref_switch_side(r300); - sr->draw_range_elements(pipe, indexBuffer, indexSize, indexBias, - minIndex, maxIndex, mode, start, count); - r300_stencilref_end(r300); - } -} - static void r300_stencilref_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) { @@ -168,13 +121,9 @@ void r300_plug_in_stencil_ref_fallback(struct r300_context *r300) { r300->stencilref_fallback = CALLOC_STRUCT(r300_stencilref_context); - /* Save original draw functions. */ - r300->stencilref_fallback->draw_arrays = r300->context.draw_arrays; - r300->stencilref_fallback->draw_range_elements = r300->context.draw_range_elements; + /* Save original draw function. */ r300->stencilref_fallback->draw_vbo = r300->context.draw_vbo; - /* Override the draw functions. */ - r300->context.draw_arrays = r300_stencilref_draw_arrays; - r300->context.draw_range_elements = r300_stencilref_draw_range_elements; + /* Override the draw function. */ r300->context.draw_vbo = r300_stencilref_draw_vbo; } diff --git a/src/gallium/drivers/r600/r600_context.c b/src/gallium/drivers/r600/r600_context.c index 2c2bd4672b..fc8aa1b866 100644 --- a/src/gallium/drivers/r600/r600_context.c +++ b/src/gallium/drivers/r600/r600_context.c @@ -313,9 +313,6 @@ struct pipe_context *r600_create_context(struct pipe_screen *screen, void *priv) rctx->context.screen = screen; rctx->context.priv = priv; rctx->context.destroy = r600_destroy_context; - rctx->context.draw_arrays = r600_draw_arrays; - rctx->context.draw_elements = r600_draw_elements; - rctx->context.draw_range_elements = r600_draw_range_elements; rctx->context.draw_vbo = r600_draw_vbo; rctx->context.flush = r600_flush; diff --git a/src/gallium/drivers/r600/r600_context.h b/src/gallium/drivers/r600/r600_context.h index 9427c19d05..97c8a46bb0 100644 --- a/src/gallium/drivers/r600/r600_context.h +++ b/src/gallium/drivers/r600/r600_context.h @@ -191,17 +191,6 @@ struct r600_context_state *r600_context_state_decref(struct r600_context_state * int r600_context_hw_states(struct r600_context *rctx); -void r600_draw_arrays(struct pipe_context *ctx, unsigned mode, - unsigned start, unsigned count); -void r600_draw_elements(struct pipe_context *ctx, - struct pipe_resource *index_buffer, - unsigned index_size, int index_bias, unsigned mode, - unsigned start, unsigned count); -void r600_draw_range_elements(struct pipe_context *ctx, - struct pipe_resource *index_buffer, - unsigned index_size, int index_bias, unsigned min_index, - unsigned max_index, unsigned mode, - unsigned start, unsigned count); void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info); diff --git a/src/gallium/drivers/r600/r600_draw.c b/src/gallium/drivers/r600/r600_draw.c index eeaa677edb..c52dfd3fb3 100644 --- a/src/gallium/drivers/r600/r600_draw.c +++ b/src/gallium/drivers/r600/r600_draw.c @@ -176,56 +176,6 @@ static int r600_draw_common(struct r600_draw *draw) return 0; } -void r600_draw_range_elements(struct pipe_context *ctx, - struct pipe_resource *index_buffer, - unsigned index_size, int index_bias, unsigned min_index, - unsigned max_index, unsigned mode, - unsigned start, unsigned count) -{ - struct r600_draw draw; - assert(index_bias == 0); - - draw.ctx = ctx; - draw.mode = mode; - draw.start = start; - draw.count = count; - draw.index_size = index_size; - draw.index_buffer = index_buffer; -printf("index_size %d min %d max %d start %d count %d\n", index_size, min_index, max_index, start, count); - r600_draw_common(&draw); -} - -void r600_draw_elements(struct pipe_context *ctx, - struct pipe_resource *index_buffer, - unsigned index_size, int index_bias, unsigned mode, - unsigned start, unsigned count) -{ - struct r600_draw draw; - assert(index_bias == 0); - - draw.ctx = ctx; - draw.mode = mode; - draw.start = start; - draw.count = count; - draw.index_size = index_size; - draw.index_buffer = index_buffer; - r600_draw_common(&draw); -} - -void r600_draw_arrays(struct pipe_context *ctx, unsigned mode, - unsigned start, unsigned count) -{ - struct r600_draw draw; - - draw.ctx = ctx; - draw.mode = mode; - draw.start = start; - draw.count = count; - draw.index_size = 0; - draw.index_buffer = NULL; - r600_draw_common(&draw); -} - void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info) { struct r600_context *rctx = r600_context(ctx); diff --git a/src/gallium/drivers/rbug/rbug_context.c b/src/gallium/drivers/rbug/rbug_context.c index c748073b2a..3ffda87520 100644 --- a/src/gallium/drivers/rbug/rbug_context.c +++ b/src/gallium/drivers/rbug/rbug_context.c @@ -102,89 +102,6 @@ rbug_draw_block_locked(struct rbug_context *rb_pipe, int flag) } -static void -rbug_draw_arrays(struct pipe_context *_pipe, - unsigned prim, - unsigned start, - unsigned count) -{ - struct rbug_context *rb_pipe = rbug_context(_pipe); - struct pipe_context *pipe = rb_pipe->pipe; - - pipe_mutex_lock(rb_pipe->draw_mutex); - rbug_draw_block_locked(rb_pipe, RBUG_BLOCK_BEFORE); - - pipe->draw_arrays(pipe, - prim, - start, - count); - - rbug_draw_block_locked(rb_pipe, RBUG_BLOCK_AFTER); - pipe_mutex_unlock(rb_pipe->draw_mutex); -} - -static void -rbug_draw_elements(struct pipe_context *_pipe, - struct pipe_resource *_indexResource, - unsigned indexSize, - int indexBias, - unsigned prim, - unsigned start, - unsigned count) -{ - struct rbug_context *rb_pipe = rbug_context(_pipe); - struct rbug_resource *rb_resource = rbug_resource(_indexResource); - struct pipe_context *pipe = rb_pipe->pipe; - struct pipe_resource *indexResource = rb_resource->resource; - - pipe_mutex_lock(rb_pipe->draw_mutex); - rbug_draw_block_locked(rb_pipe, RBUG_BLOCK_BEFORE); - - pipe->draw_elements(pipe, - indexResource, - indexSize, - indexBias, - prim, - start, - count); - - rbug_draw_block_locked(rb_pipe, RBUG_BLOCK_AFTER); - pipe_mutex_unlock(rb_pipe->draw_mutex); -} - -static void -rbug_draw_range_elements(struct pipe_context *_pipe, - struct pipe_resource *_indexResource, - unsigned indexSize, - int indexBias, - unsigned minIndex, - unsigned maxIndex, - unsigned mode, - unsigned start, - unsigned count) -{ - struct rbug_context *rb_pipe = rbug_context(_pipe); - struct rbug_resource *rb_resource = rbug_resource(_indexResource); - struct pipe_context *pipe = rb_pipe->pipe; - struct pipe_resource *indexResource = rb_resource->resource; - - pipe_mutex_lock(rb_pipe->draw_mutex); - rbug_draw_block_locked(rb_pipe, RBUG_BLOCK_BEFORE); - - pipe->draw_range_elements(pipe, - indexResource, - indexSize, - indexBias, - minIndex, - maxIndex, - mode, - start, - count); - - rbug_draw_block_locked(rb_pipe, RBUG_BLOCK_AFTER); - pipe_mutex_unlock(rb_pipe->draw_mutex); -} - static void rbug_draw_vbo(struct pipe_context *_pipe, const struct pipe_draw_info *info) { @@ -1072,9 +989,6 @@ rbug_context_create(struct pipe_screen *_screen, struct pipe_context *pipe) rb_pipe->base.draw = NULL; rb_pipe->base.destroy = rbug_destroy; - rb_pipe->base.draw_arrays = rbug_draw_arrays; - rb_pipe->base.draw_elements = rbug_draw_elements; - rb_pipe->base.draw_range_elements = rbug_draw_range_elements; rb_pipe->base.draw_vbo = rbug_draw_vbo; rb_pipe->base.create_query = rbug_create_query; rb_pipe->base.destroy_query = rbug_destroy_query; diff --git a/src/gallium/drivers/softpipe/sp_context.c b/src/gallium/drivers/softpipe/sp_context.c index fa1fae6f00..a7c9959b3e 100644 --- a/src/gallium/drivers/softpipe/sp_context.c +++ b/src/gallium/drivers/softpipe/sp_context.c @@ -284,11 +284,6 @@ softpipe_create_context( struct pipe_screen *screen, softpipe->pipe.set_vertex_buffers = softpipe_set_vertex_buffers; softpipe->pipe.set_index_buffer = softpipe_set_index_buffer; - softpipe->pipe.draw_arrays = softpipe_draw_arrays; - softpipe->pipe.draw_elements = softpipe_draw_elements; - softpipe->pipe.draw_range_elements = softpipe_draw_range_elements; - softpipe->pipe.draw_arrays_instanced = softpipe_draw_arrays_instanced; - softpipe->pipe.draw_elements_instanced = softpipe_draw_elements_instanced; softpipe->pipe.draw_vbo = softpipe_draw_vbo; softpipe->pipe.draw_stream_output = softpipe_draw_stream_output; diff --git a/src/gallium/drivers/softpipe/sp_draw_arrays.c b/src/gallium/drivers/softpipe/sp_draw_arrays.c index 2855f55a0e..3a33cdef96 100644 --- a/src/gallium/drivers/softpipe/sp_draw_arrays.c +++ b/src/gallium/drivers/softpipe/sp_draw_arrays.c @@ -35,7 +35,6 @@ #include "pipe/p_context.h" #include "util/u_inlines.h" #include "util/u_prim.h" -#include "util/u_draw_quad.h" #include "sp_context.h" #include "sp_query.h" @@ -173,152 +172,3 @@ softpipe_draw_vbo(struct pipe_context *pipe, /* Note: leave drawing surfaces mapped */ sp->dirty_render_cache = TRUE; } - -static void -softpipe_draw_range_elements_instanced(struct pipe_context *pipe, - struct pipe_resource *indexBuffer, - unsigned indexSize, - int indexBias, - unsigned minIndex, - unsigned maxIndex, - unsigned mode, - unsigned start, - unsigned count, - unsigned startInstance, - unsigned instanceCount) -{ - struct softpipe_context *sp = softpipe_context(pipe); - struct pipe_draw_info info; - struct pipe_index_buffer saved_ib, ib; - - util_draw_init_info(&info); - info.mode = mode; - info.start = start; - info.count = count; - info.start_instance = startInstance; - info.instance_count = instanceCount; - info.index_bias = indexBias; - info.min_index = minIndex; - info.max_index = maxIndex; - - if (indexBuffer) { - info.indexed = TRUE; - - saved_ib = sp->index_buffer; - ib.buffer = indexBuffer; - ib.offset = 0; - ib.index_size = indexSize; - pipe->set_index_buffer(pipe, &ib); - } - - softpipe_draw_vbo(pipe, &info); - - if (indexBuffer) - pipe->set_index_buffer(pipe, &saved_ib); -} - - -void -softpipe_draw_range_elements(struct pipe_context *pipe, - struct pipe_resource *indexBuffer, - unsigned indexSize, - int indexBias, - unsigned min_index, - unsigned max_index, - unsigned mode, unsigned start, unsigned count) -{ - softpipe_draw_range_elements_instanced(pipe, - indexBuffer, - indexSize, - indexBias, - min_index, - max_index, - mode, - start, - count, - 0, - 1); -} - - -void -softpipe_draw_elements(struct pipe_context *pipe, - struct pipe_resource *indexBuffer, - unsigned indexSize, int indexBias, - unsigned mode, unsigned start, unsigned count) -{ - softpipe_draw_range_elements_instanced(pipe, - indexBuffer, - indexSize, - indexBias, - 0, - 0xffffffff, - mode, - start, - count, - 0, - 1); -} - -void -softpipe_draw_arrays_instanced(struct pipe_context *pipe, - unsigned mode, - unsigned start, - unsigned count, - unsigned startInstance, - unsigned instanceCount) -{ - softpipe_draw_range_elements_instanced(pipe, - NULL, - 0, - 0, - 0, - 0xffffffff, - mode, - start, - count, - startInstance, - instanceCount); -} - -void -softpipe_draw_elements_instanced(struct pipe_context *pipe, - struct pipe_resource *indexBuffer, - unsigned indexSize, - int indexBias, - unsigned mode, - unsigned start, - unsigned count, - unsigned startInstance, - unsigned instanceCount) -{ - softpipe_draw_range_elements_instanced(pipe, - indexBuffer, - indexSize, - indexBias, - 0, - 0xffffffff, - mode, - start, - count, - startInstance, - instanceCount); -} - -void -softpipe_draw_arrays(struct pipe_context *pipe, unsigned mode, - unsigned start, unsigned count) -{ - softpipe_draw_range_elements_instanced(pipe, - NULL, - 0, - 0, - 0, - 0xffffffff, - mode, - start, - count, - 0, - 1); -} - diff --git a/src/gallium/drivers/softpipe/sp_state.h b/src/gallium/drivers/softpipe/sp_state.h index f04b0a5d31..39d204de8a 100644 --- a/src/gallium/drivers/softpipe/sp_state.h +++ b/src/gallium/drivers/softpipe/sp_state.h @@ -228,41 +228,6 @@ void softpipe_set_index_buffer(struct pipe_context *, void softpipe_update_derived( struct softpipe_context *softpipe ); -void softpipe_draw_arrays(struct pipe_context *pipe, unsigned mode, - unsigned start, unsigned count); - -void softpipe_draw_elements(struct pipe_context *pipe, - struct pipe_resource *indexBuffer, - unsigned indexSize, int indexBias, - unsigned mode, unsigned start, unsigned count); -void -softpipe_draw_range_elements(struct pipe_context *pipe, - struct pipe_resource *indexBuffer, - unsigned indexSize, - int indexBias, - unsigned min_index, - unsigned max_index, - unsigned mode, unsigned start, unsigned count); - -void -softpipe_draw_arrays_instanced(struct pipe_context *pipe, - unsigned mode, - unsigned start, - unsigned count, - unsigned startInstance, - unsigned instanceCount); - -void -softpipe_draw_elements_instanced(struct pipe_context *pipe, - struct pipe_resource *indexBuffer, - unsigned indexSize, - int indexBias, - unsigned mode, - unsigned start, - unsigned count, - unsigned startInstance, - unsigned instanceCount); - void softpipe_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info); diff --git a/src/gallium/drivers/svga/svga_pipe_draw.c b/src/gallium/drivers/svga/svga_pipe_draw.c index fceaa83d70..de08bc5e56 100644 --- a/src/gallium/drivers/svga/svga_pipe_draw.c +++ b/src/gallium/drivers/svga/svga_pipe_draw.c @@ -226,28 +226,6 @@ svga_draw_range_elements( struct pipe_context *pipe, } -static void -svga_draw_elements( struct pipe_context *pipe, - struct pipe_resource *index_buffer, - unsigned index_size, int index_bias, - unsigned prim, unsigned start, unsigned count) -{ - svga_draw_range_elements( pipe, index_buffer, - index_size, index_bias, - 0, 0xffffffff, - prim, start, count ); -} - -static void -svga_draw_arrays( struct pipe_context *pipe, - unsigned prim, unsigned start, unsigned count) -{ - svga_draw_range_elements(pipe, NULL, 0, 0, - start, start + count - 1, - prim, - start, count); -} - static void svga_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) { @@ -274,8 +252,5 @@ svga_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) void svga_init_draw_functions( struct svga_context *svga ) { - svga->pipe.draw_arrays = svga_draw_arrays; - svga->pipe.draw_elements = svga_draw_elements; - svga->pipe.draw_range_elements = svga_draw_range_elements; svga->pipe.draw_vbo = svga_draw_vbo; } diff --git a/src/gallium/drivers/trace/tr_context.c b/src/gallium/drivers/trace/tr_context.c index 91c9bf0999..84e5a6a824 100644 --- a/src/gallium/drivers/trace/tr_context.c +++ b/src/gallium/drivers/trace/tr_context.c @@ -82,91 +82,6 @@ trace_surface_unwrap(struct trace_context *tr_ctx, } -static INLINE void -trace_context_draw_arrays(struct pipe_context *_pipe, - unsigned mode, unsigned start, unsigned count) -{ - struct trace_context *tr_ctx = trace_context(_pipe); - struct pipe_context *pipe = tr_ctx->pipe; - - trace_dump_call_begin("pipe_context", "draw_arrays"); - - trace_dump_arg(ptr, pipe); - trace_dump_arg(uint, mode); - trace_dump_arg(uint, start); - trace_dump_arg(uint, count); - - pipe->draw_arrays(pipe, mode, start, count); - - trace_dump_call_end(); -} - - -static INLINE void -trace_context_draw_elements(struct pipe_context *_pipe, - struct pipe_resource *_indexBuffer, - unsigned indexSize, int indexBias, - unsigned mode, unsigned start, unsigned count) -{ - struct trace_context *tr_ctx = trace_context(_pipe); - struct trace_resource *tr_buf = trace_resource(_indexBuffer); - struct pipe_context *pipe = tr_ctx->pipe; - struct pipe_resource *indexBuffer = tr_buf->resource; - - trace_dump_call_begin("pipe_context", "draw_elements"); - - trace_dump_arg(ptr, pipe); - trace_dump_arg(ptr, indexBuffer); - trace_dump_arg(uint, indexSize); - trace_dump_arg(int, indexBias); - trace_dump_arg(uint, mode); - trace_dump_arg(uint, start); - trace_dump_arg(uint, count); - - pipe->draw_elements(pipe, indexBuffer, indexSize, indexBias, - mode, start, count); - - trace_dump_call_end(); -} - - -static INLINE void -trace_context_draw_range_elements(struct pipe_context *_pipe, - struct pipe_resource *_indexBuffer, - unsigned indexSize, - int indexBias, - unsigned minIndex, - unsigned maxIndex, - unsigned mode, - unsigned start, - unsigned count) -{ - struct trace_context *tr_ctx = trace_context(_pipe); - struct trace_resource *tr_buf = trace_resource(_indexBuffer); - struct pipe_context *pipe = tr_ctx->pipe; - struct pipe_resource *indexBuffer = tr_buf->resource; - - trace_dump_call_begin("pipe_context", "draw_range_elements"); - - trace_dump_arg(ptr, pipe); - trace_dump_arg(ptr, indexBuffer); - trace_dump_arg(uint, indexSize); - trace_dump_arg(int, indexBias); - trace_dump_arg(uint, minIndex); - trace_dump_arg(uint, maxIndex); - trace_dump_arg(uint, mode); - trace_dump_arg(uint, start); - trace_dump_arg(uint, count); - - pipe->draw_range_elements(pipe, - indexBuffer, indexSize, indexBias, - minIndex, maxIndex, - mode, start, count); - - trace_dump_call_end(); -} - - static INLINE void trace_context_draw_vbo(struct pipe_context *_pipe, const struct pipe_draw_info *info) @@ -1483,9 +1398,6 @@ trace_context_create(struct trace_screen *tr_scr, tr_ctx->base.screen = &tr_scr->base; tr_ctx->base.destroy = trace_context_destroy; - tr_ctx->base.draw_arrays = trace_context_draw_arrays; - tr_ctx->base.draw_elements = trace_context_draw_elements; - tr_ctx->base.draw_range_elements = trace_context_draw_range_elements; tr_ctx->base.draw_vbo = trace_context_draw_vbo; tr_ctx->base.create_query = trace_context_create_query; tr_ctx->base.destroy_query = trace_context_destroy_query; diff --git a/src/gallium/include/pipe/p_context.h b/src/gallium/include/pipe/p_context.h index 3314b1e0e0..0579962ec6 100644 --- a/src/gallium/include/pipe/p_context.h +++ b/src/gallium/include/pipe/p_context.h @@ -61,47 +61,6 @@ struct pipe_context { * VBO drawing */ /*@{*/ - void (*draw_arrays)( struct pipe_context *pipe, - unsigned mode, unsigned start, unsigned count); - - void (*draw_elements)( struct pipe_context *pipe, - struct pipe_resource *indexBuffer, - unsigned indexSize, - int indexBias, - unsigned mode, unsigned start, unsigned count); - - void (*draw_arrays_instanced)(struct pipe_context *pipe, - unsigned mode, - unsigned start, - unsigned count, - unsigned startInstance, - unsigned instanceCount); - - void (*draw_elements_instanced)(struct pipe_context *pipe, - struct pipe_resource *indexBuffer, - unsigned indexSize, - int indexBias, - unsigned mode, - unsigned start, - unsigned count, - unsigned startInstance, - unsigned instanceCount); - - /* XXX: this is (probably) a temporary entrypoint, as the range - * information should be available from the vertex_buffer state. - * Using this to quickly evaluate a specialized path in the draw - * module. - */ - void (*draw_range_elements)( struct pipe_context *pipe, - struct pipe_resource *indexBuffer, - unsigned indexSize, - int indexBias, - unsigned minIndex, - unsigned maxIndex, - unsigned mode, - unsigned start, - unsigned count); - void (*draw_vbo)( struct pipe_context *pipe, const struct pipe_draw_info *info ); -- cgit v1.2.3 From e7f69c459af3274b9d1435c06a15b14eb40e4cc5 Mon Sep 17 00:00:00 2001 From: Chia-I Wu Date: Sat, 17 Jul 2010 22:00:04 +0800 Subject: gallium/docs: Document draw_vbo and set_index_buffer. Document the new unified drawing method and remove references to old ones. --- src/gallium/docs/d3d11ddi.txt | 30 ++++++++------------ src/gallium/docs/source/context.rst | 55 +++++++++++-------------------------- 2 files changed, 27 insertions(+), 58 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/docs/d3d11ddi.txt b/src/gallium/docs/d3d11ddi.txt index 0954c2926d..f8155c828b 100644 --- a/src/gallium/docs/d3d11ddi.txt +++ b/src/gallium/docs/d3d11ddi.txt @@ -66,9 +66,6 @@ Unordered access view: view supporting random read/write access (usually from co clear + Gallium supports clearing both render targets and depth/stencil with a single call -draw_range_elements - + Gallium supports indexed draw with explicit range - fence_signalled fence_finish + D3D10/D3D11 don't appear to support explicit fencing; queries can often substitute though, and flushing is supported @@ -271,31 +268,27 @@ Dispatch (D3D11 only) DispatchIndirect (D3D11 only) - Gallium does not support compute shaders -Draw -> draw_arrays +Draw -> draw_vbo ! D3D11 sets primitive modes separately with IaSetTopology: it's not obvious which is better DrawAuto -> draw_auto -DrawIndexed -> draw_elements +DrawIndexed -> draw_vbo ! D3D11 sets primitive modes separately with IaSetTopology: it's not obvious which is better - * may want to add a separate set_index_buffer - - Gallium lacks base vertex for indexed draw calls - + D3D11 lacks draw_range_elements functionality, which is required for OpenGL + + D3D11 lacks explicit range, which is required for OpenGL -DrawIndexedInstanced -> draw_elements_instanced +DrawIndexedInstanced -> draw_vbo ! D3D11 sets primitive modes separately with IaSetTopology: it's not obvious which is better - * may want to add a separate set_index_buffer - - Gallium lacks base vertex for indexed draw calls -DrawIndexedInstancedIndirect (D3D11 only) -> call draw_elements_instanced multiple times in software - # this allows to use an hardware buffer to specify the parameters for multiple draw_elements_instanced calls +DrawIndexedInstancedIndirect (D3D11 only) + # this allows to use an hardware buffer to specify the parameters for multiple draw_vbo calls - Gallium does not support draw call parameter buffers and indirect draw -DrawInstanced -> draw_arrays_instanced +DrawInstanced -> draw_vbo ! D3D11 sets primitive modes separately with IaSetTopology: it's not obvious which is better -DrawInstancedIndirect (D3D11 only) -> call draw_arrays_instanced multiple times in software - # this allows to use an hardware buffer to specify the parameters for multiple draw_arrays_instanced calls +DrawInstancedIndirect (D3D11 only) + # this allows to use an hardware buffer to specify the parameters for multiple draw_vbo calls - Gallium does not support draw call parameter buffers and indirect draws DsSetConstantBuffers (D3D11 only) @@ -332,10 +325,9 @@ HsSetShaderResources (D3D11 only) HsSetShaderWithIfaces (D3D11 only) - Gallium does not support hull shaders -IaSetIndexBuffer - ! Gallium passes this to the draw_elements or draw_elements_instanced calls +IaSetIndexBuffer -> set_index_buffer + Gallium supports 8-bit indices - ! the D3D11 interface allows index-size-unaligned byte offsets into index buffers; it's not clear whether they actually work + # the D3D11 interface allows index-size-unaligned byte offsets into the index buffer; most drivers will abort with an assertion IaSetInputLayout -> bind_vertex_elements_state diff --git a/src/gallium/docs/source/context.rst b/src/gallium/docs/source/context.rst index 4e35a4c408..f241411a00 100644 --- a/src/gallium/docs/source/context.rst +++ b/src/gallium/docs/source/context.rst @@ -45,6 +45,7 @@ buffers, surfaces) are bound to the driver. * ``set_vertex_buffers`` +* ``set_index_buffer`` Non-CSO State ^^^^^^^^^^^^^ @@ -132,50 +133,26 @@ this surface need not be bound to the framebuffer. Drawing ^^^^^^^ -``draw_arrays`` draws a specified primitive. +``draw_vbo`` draws a specified primitive. The primitive mode and other +properties are described by ``pipe_draw_info``. -This command is equivalent to calling ``draw_arrays_instanced`` -with ``startInstance`` set to 0 and ``instanceCount`` set to 1. +The ``mode``, ``start``, and ``count`` fields of ``pipe_draw_info`` specify the +the mode of the primitive and the vertices to be fetched, in the range between +``start`` to ``start``+``count``-1, inclusive. -``draw_elements`` draws a specified primitive using an optional -index buffer. +Every instance with instanceID in the range between ``start_instance`` and +``start_instance``+``instance_count``-1, inclusive, will be drawn. -This command is equivalent to calling ``draw_elements_instanced`` -with ``startInstance`` set to 0 and ``instanceCount`` set to 1. +All vertex indices must fall inside the range given by ``min_index`` and +``max_index``. In case non-indexed draw, ``min_index`` should be set to +``start`` and ``max_index`` should be set to ``start``+``count``-1. -``draw_range_elements`` +``index_bias`` is a value added to every vertex index before fetching vertex +attributes. It does not affect ``min_index`` and ``max_index``. -XXX: this is (probably) a temporary entrypoint, as the range -information should be available from the vertex_buffer state. -Using this to quickly evaluate a specialized path in the draw -module. - -``draw_arrays_instanced`` draws multiple instances of the same primitive. - -This command is equivalent to calling ``draw_elements_instanced`` -with ``indexBuffer`` set to NULL and ``indexSize`` set to 0. - -``draw_elements_instanced`` draws multiple instances of the same primitive -using an optional index buffer. - -For instanceID in the range between ``startInstance`` -and ``startInstance``+``instanceCount``-1, inclusive, draw a primitive -specified by ``mode`` and sequential numbers in the range between ``start`` -and ``start``+``count``-1, inclusive. - -If ``indexBuffer`` is not NULL, it specifies an index buffer with index -byte size of ``indexSize``. The sequential numbers are used to lookup -the index buffer and the resulting indices in turn are used to fetch -vertex attributes. - -If ``indexBuffer`` is NULL, the sequential numbers are used directly -as indices to fetch vertex attributes. - -``indexBias`` is a value which is added to every index read from the index -buffer before fetching vertex attributes. - -``minIndex`` and ``maxIndex`` describe minimum and maximum index contained in -the index buffer. +If there is an index buffer bound, and ``indexed`` field is true, all vertex +indices will be looked up in the index buffer. ``min_index``, ``max_index``, +and ``index_bias`` apply after index lookup. If a given vertex element has ``instance_divisor`` set to 0, it is said it contains per-vertex data and effective vertex attribute address needs -- cgit v1.2.3 From 92f9b05499de9f0e8eda11d1a75ba7955c9cc602 Mon Sep 17 00:00:00 2001 From: Chia-I Wu Date: Thu, 29 Jul 2010 15:54:10 +0800 Subject: gallium: Avoid void pointer arithmetic. This fixes fdo bug #29286. --- src/gallium/drivers/i915/i915_context.c | 4 ++-- src/gallium/drivers/llvmpipe/lp_draw_arrays.c | 4 ++-- src/gallium/drivers/r300/r300_render.c | 2 +- src/gallium/drivers/softpipe/sp_draw_arrays.c | 4 ++-- 4 files changed, 7 insertions(+), 7 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/drivers/i915/i915_context.c b/src/gallium/drivers/i915/i915_context.c index 496efc99da..2beb9e3091 100644 --- a/src/gallium/drivers/i915/i915_context.c +++ b/src/gallium/drivers/i915/i915_context.c @@ -67,8 +67,8 @@ i915_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) * Map index buffer, if present */ if (info->indexed && i915->index_buffer.buffer) { - mapped_indices = i915_buffer(i915->index_buffer.buffer)->data; - mapped_indices += i915->index_buffer.offset; + char *indices = (char *) i915_buffer(i915->index_buffer.buffer)->data; + mapped_indices = (void *) (indices + i915->index_buffer.offset); } draw_set_mapped_element_buffer_range(draw, (mapped_indices) ? diff --git a/src/gallium/drivers/llvmpipe/lp_draw_arrays.c b/src/gallium/drivers/llvmpipe/lp_draw_arrays.c index 22c2836e22..e73b431cb4 100644 --- a/src/gallium/drivers/llvmpipe/lp_draw_arrays.c +++ b/src/gallium/drivers/llvmpipe/lp_draw_arrays.c @@ -69,8 +69,8 @@ llvmpipe_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) /* Map index buffer, if present */ if (info->indexed && lp->index_buffer.buffer) { - mapped_indices = llvmpipe_resource_data(lp->index_buffer.buffer); - mapped_indices += lp->index_buffer.offset; + char *indices = (char *) llvmpipe_resource_data(lp->index_buffer.buffer); + mapped_indices = (void *) (indices + lp->index_buffer.offset); } draw_set_mapped_element_buffer_range(draw, (mapped_indices) ? diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c index c179b07e86..35d7756584 100644 --- a/src/gallium/drivers/r300/r300_render.c +++ b/src/gallium/drivers/r300/r300_render.c @@ -692,7 +692,7 @@ static void r300_swtcl_draw_vbo(struct pipe_context* pipe, indices = pipe_buffer_map(pipe, r300->index_buffer.buffer, PIPE_TRANSFER_READ, &ib_transfer); if (indices) - indices += r300->index_buffer.offset; + indices = (void *) ((char *) indices + r300->index_buffer.offset); } draw_set_mapped_element_buffer_range(r300->draw, (indices) ? diff --git a/src/gallium/drivers/softpipe/sp_draw_arrays.c b/src/gallium/drivers/softpipe/sp_draw_arrays.c index 3a33cdef96..386c8acb8c 100644 --- a/src/gallium/drivers/softpipe/sp_draw_arrays.c +++ b/src/gallium/drivers/softpipe/sp_draw_arrays.c @@ -139,8 +139,8 @@ softpipe_draw_vbo(struct pipe_context *pipe, /* Map index buffer, if present */ if (info->indexed && sp->index_buffer.buffer) { - mapped_indices = softpipe_resource(sp->index_buffer.buffer)->data; - mapped_indices += sp->index_buffer.offset; + char *indices = (char *) softpipe_resource(sp->index_buffer.buffer)->data; + mapped_indices = (void *) (indices + sp->index_buffer.offset); } draw_set_mapped_element_buffer_range(draw, (mapped_indices) ? -- cgit v1.2.3 From d4d62b6178c43cff9a0de522c4cf512109cb4b03 Mon Sep 17 00:00:00 2001 From: Chia-I Wu Date: Thu, 29 Jul 2010 16:04:28 +0800 Subject: graw/tests: Use pipe_context::draw_vbo. The other drawing variants such as draw_arrays or draw_elements_instanced were removed. This fixes fdo bug #29287. --- src/gallium/tests/graw/fs-test.c | 3 ++- src/gallium/tests/graw/gs-test.c | 5 +++-- src/gallium/tests/graw/quad-tex.c | 3 ++- src/gallium/tests/graw/tri-gs.c | 3 ++- src/gallium/tests/graw/tri-instanced.c | 25 +++++++++++++++---------- src/gallium/tests/graw/tri.c | 3 ++- src/gallium/tests/graw/vs-test.c | 3 ++- 7 files changed, 28 insertions(+), 17 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/tests/graw/fs-test.c b/src/gallium/tests/graw/fs-test.c index dea087357d..53fbb744d8 100644 --- a/src/gallium/tests/graw/fs-test.c +++ b/src/gallium/tests/graw/fs-test.c @@ -13,6 +13,7 @@ #include "util/u_debug.h" /* debug_dump_surface_bmp() */ #include "util/u_inlines.h" #include "util/u_memory.h" /* Offset() */ +#include "util/u_draw_quad.h" #include "util/u_box.h" static const char *filename = NULL; @@ -275,7 +276,7 @@ static void draw( void ) float clear_color[4] = {.1,.3,.5,0}; ctx->clear(ctx, PIPE_CLEAR_COLOR, clear_color, 0, 0); - ctx->draw_arrays(ctx, PIPE_PRIM_TRIANGLES, 0, 3); + util_draw_arrays(ctx, PIPE_PRIM_TRIANGLES, 0, 3); ctx->flush(ctx, PIPE_FLUSH_RENDER_CACHE, NULL); #if 0 diff --git a/src/gallium/tests/graw/gs-test.c b/src/gallium/tests/graw/gs-test.c index 3087d446fc..62714900bd 100644 --- a/src/gallium/tests/graw/gs-test.c +++ b/src/gallium/tests/graw/gs-test.c @@ -13,6 +13,7 @@ #include "util/u_debug.h" /* debug_dump_surface_bmp() */ #include "util/u_inlines.h" #include "util/u_memory.h" /* Offset() */ +#include "util/u_draw_quad.h" #include "util/u_box.h" static const char *filename = NULL; @@ -336,9 +337,9 @@ static void draw( void ) ctx->clear(ctx, PIPE_CLEAR_COLOR, clear_color, 0, 0); if (draw_strip) - ctx->draw_arrays(ctx, PIPE_PRIM_TRIANGLE_STRIP, 0, 4); + util_draw_arrays(ctx, PIPE_PRIM_TRIANGLE_STRIP, 0, 4); else - ctx->draw_arrays(ctx, PIPE_PRIM_TRIANGLES, 0, 3); + util_draw_arrays(ctx, PIPE_PRIM_TRIANGLES, 0, 3); ctx->flush(ctx, PIPE_FLUSH_RENDER_CACHE, NULL); diff --git a/src/gallium/tests/graw/quad-tex.c b/src/gallium/tests/graw/quad-tex.c index 6a0a2ba295..c50ef12ab5 100644 --- a/src/gallium/tests/graw/quad-tex.c +++ b/src/gallium/tests/graw/quad-tex.c @@ -12,6 +12,7 @@ #include "util/u_debug.h" /* debug_dump_surface_bmp() */ #include "util/u_inlines.h" #include "util/u_memory.h" /* Offset() */ +#include "util/u_draw_quad.h" #include "util/u_box.h" enum pipe_format formats[] = { @@ -146,7 +147,7 @@ static void draw( void ) float clear_color[4] = {.5,.5,.5,1}; ctx->clear(ctx, PIPE_CLEAR_COLOR, clear_color, 0, 0); - ctx->draw_arrays(ctx, PIPE_PRIM_QUADS, 0, 4); + util_draw_arrays(ctx, PIPE_PRIM_QUADS, 0, 4); ctx->flush(ctx, PIPE_FLUSH_RENDER_CACHE, NULL); #if 0 diff --git a/src/gallium/tests/graw/tri-gs.c b/src/gallium/tests/graw/tri-gs.c index d187505f8d..152ae408eb 100644 --- a/src/gallium/tests/graw/tri-gs.c +++ b/src/gallium/tests/graw/tri-gs.c @@ -10,6 +10,7 @@ #include "util/u_debug.h" /* debug_dump_surface_bmp() */ #include "util/u_memory.h" /* Offset() */ +#include "util/u_draw_quad.h" enum pipe_format formats[] = { PIPE_FORMAT_R8G8B8A8_UNORM, @@ -161,7 +162,7 @@ static void draw( void ) float clear_color[4] = {1,0,1,1}; ctx->clear(ctx, PIPE_CLEAR_COLOR, clear_color, 0, 0); - ctx->draw_arrays(ctx, PIPE_PRIM_TRIANGLES, 0, 3); + util_draw_arrays(ctx, PIPE_PRIM_TRIANGLES, 0, 3); ctx->flush(ctx, PIPE_FLUSH_RENDER_CACHE, NULL); screen->flush_frontbuffer(screen, surf, window); diff --git a/src/gallium/tests/graw/tri-instanced.c b/src/gallium/tests/graw/tri-instanced.c index 30e205f143..8859f745fd 100644 --- a/src/gallium/tests/graw/tri-instanced.c +++ b/src/gallium/tests/graw/tri-instanced.c @@ -13,6 +13,7 @@ #include "util/u_debug.h" /* debug_dump_surface_bmp() */ #include "util/u_memory.h" /* Offset() */ +#include "util/u_draw_quad.h" enum pipe_format formats[] = { @@ -27,7 +28,6 @@ static const int HEIGHT = 300; static struct pipe_screen *screen = NULL; static struct pipe_context *ctx = NULL; static struct pipe_surface *surf = NULL; -static struct pipe_resource *indexBuffer = NULL; static void *window = NULL; struct vertex { @@ -105,6 +105,7 @@ static void set_vertices( void ) { struct pipe_vertex_element ve[3]; struct pipe_vertex_buffer vbuf[2]; + struct pipe_index_buffer ibuf; void *handle; memset(ve, 0, sizeof ve); @@ -151,11 +152,14 @@ static void set_vertices( void ) ctx->set_vertex_buffers(ctx, 2, vbuf); /* index data */ - indexBuffer = screen->user_buffer_create(screen, + ibuf.buffer = screen->user_buffer_create(screen, indices, sizeof(indices), PIPE_BIND_VERTEX_BUFFER); + ibuf.offset = 0; + ibuf.index_size = 2; + ctx->set_index_buffer(ctx, &ibuf); } @@ -195,18 +199,19 @@ static void set_fragment_shader( void ) static void draw( void ) { float clear_color[4] = {1,0,1,1}; + struct pipe_draw_info info; ctx->clear(ctx, PIPE_CLEAR_COLOR, clear_color, 0, 0); + util_draw_init_info(&info); + info.indexed = (draw_elements != 0); + info.mode = PIPE_PRIM_TRIANGLES; + info.start = 0; + info.count = 3; /* draw NUM_INST triangles */ - if (draw_elements) - ctx->draw_elements_instanced(ctx, indexBuffer, 2, - 0, /* indexBias */ - PIPE_PRIM_TRIANGLES, - 0, 3, /* start, count */ - 0, NUM_INST); /* startInst, instCount */ - else - ctx->draw_arrays_instanced(ctx, PIPE_PRIM_TRIANGLES, 0, 3, 0, NUM_INST); + info.instance_count = NUM_INST; + + ctx->draw_vbo(ctx, &info); ctx->flush(ctx, PIPE_FLUSH_RENDER_CACHE, NULL); diff --git a/src/gallium/tests/graw/tri.c b/src/gallium/tests/graw/tri.c index 80377f526d..4dbd2c062a 100644 --- a/src/gallium/tests/graw/tri.c +++ b/src/gallium/tests/graw/tri.c @@ -10,6 +10,7 @@ #include "util/u_debug.h" /* debug_dump_surface_bmp() */ #include "util/u_memory.h" /* Offset() */ +#include "util/u_draw_quad.h" enum pipe_format formats[] = { PIPE_FORMAT_R8G8B8A8_UNORM, @@ -134,7 +135,7 @@ static void draw( void ) float clear_color[4] = {1,0,1,1}; ctx->clear(ctx, PIPE_CLEAR_COLOR, clear_color, 0, 0); - ctx->draw_arrays(ctx, PIPE_PRIM_TRIANGLES, 0, 3); + util_draw_arrays(ctx, PIPE_PRIM_TRIANGLES, 0, 3); ctx->flush(ctx, PIPE_FLUSH_RENDER_CACHE, NULL); #if 0 diff --git a/src/gallium/tests/graw/vs-test.c b/src/gallium/tests/graw/vs-test.c index 7f93db42c0..e1cd814bf7 100644 --- a/src/gallium/tests/graw/vs-test.c +++ b/src/gallium/tests/graw/vs-test.c @@ -14,6 +14,7 @@ #include "util/u_debug.h" /* debug_dump_surface_bmp() */ #include "util/u_inlines.h" #include "util/u_memory.h" /* Offset() */ +#include "util/u_draw_quad.h" #include "util/u_box.h" static const char *filename = NULL; @@ -226,7 +227,7 @@ static void draw( void ) float clear_color[4] = {.1,.3,.5,0}; ctx->clear(ctx, PIPE_CLEAR_COLOR, clear_color, 0, 0); - ctx->draw_arrays(ctx, PIPE_PRIM_POINTS, 0, Elements(vertices)); + util_draw_arrays(ctx, PIPE_PRIM_POINTS, 0, Elements(vertices)); ctx->flush(ctx, PIPE_FLUSH_RENDER_CACHE, NULL); #if 0 -- cgit v1.2.3 From ab25c1597ddd60f148cafdbb2ea1e7562c9f9f69 Mon Sep 17 00:00:00 2001 From: Chia-I Wu Date: Thu, 29 Jul 2010 17:39:27 +0800 Subject: st/python: Adapt to interface change. This is only compile tested. --- src/gallium/state_trackers/python/p_context.i | 39 +++++++++++++-------------- 1 file changed, 18 insertions(+), 21 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/state_trackers/python/p_context.i b/src/gallium/state_trackers/python/p_context.i index cf0144b5dc..40c4603fb9 100644 --- a/src/gallium/state_trackers/python/p_context.i +++ b/src/gallium/state_trackers/python/p_context.i @@ -289,6 +289,20 @@ struct st_context { $self->vertex_buffers); } + void set_index_buffer(unsigned index_size, + unsigned offset, + struct pipe_resource *buffer) + { + struct pipe_index_buffer ib; + + memset(&ib, 0, sizeof(ib)); + ib.index_size = index_size; + ib.offset = offset; + ib.buffer = buffer; + + $self->pipe->set_index_buffer($self->pipe, &ib); + } + void set_vertex_element(unsigned index, const struct pipe_vertex_element *element) { @@ -308,29 +322,12 @@ struct st_context { */ void draw_arrays(unsigned mode, unsigned start, unsigned count) { - $self->pipe->draw_arrays($self->pipe, mode, start, count); - } - - void draw_elements( struct pipe_resource *indexBuffer, - unsigned indexSize, int indexBias, - unsigned mode, unsigned start, unsigned count) - { - $self->pipe->draw_elements($self->pipe, - indexBuffer, - indexSize, - indexBias, - mode, start, count); + util_draw_arrays($self->pipe, mode, start, count); } - void draw_range_elements( struct pipe_resource *indexBuffer, - unsigned indexSize, int indexBias, - unsigned minIndex, unsigned maxIndex, - unsigned mode, unsigned start, unsigned count) + void draw_vbo(const struct pipe_draw_info *info) { - $self->pipe->draw_range_elements($self->pipe, - indexBuffer, indexSize, indexBias, - minIndex, maxIndex, - mode, start, count); + $self->pipe->draw_vbo($self->pipe, info); } void draw_vertices(unsigned prim, @@ -382,7 +379,7 @@ struct st_context { pipe->set_vertex_buffers(pipe, 1, &vbuffer); /* draw */ - pipe->draw_arrays(pipe, prim, 0, num_verts); + util_draw_arrays(pipe, prim, 0, num_verts); cso_restore_vertex_elements($self->cso); -- cgit v1.2.3 From 38f5b1bc38fde041162e90e0ba3955ac661e1abf Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Thu, 22 Jul 2010 16:53:39 +0100 Subject: util: Don't include xmmintrin.h. Unnecessary. --- src/gallium/auxiliary/util/u_sse.h | 1 - 1 file changed, 1 deletion(-) (limited to 'src/gallium') diff --git a/src/gallium/auxiliary/util/u_sse.h b/src/gallium/auxiliary/util/u_sse.h index e2a8491e62..6145e34aa3 100644 --- a/src/gallium/auxiliary/util/u_sse.h +++ b/src/gallium/auxiliary/util/u_sse.h @@ -41,7 +41,6 @@ #if defined(PIPE_ARCH_SSE) -#include #include -- cgit v1.2.3 From e3d2ebac115f7b7899664fefc2652fb829acfa27 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Thu, 22 Jul 2010 18:37:46 +0100 Subject: llvmpipe: Avoid corrupting the FPU stack with MMX instructions on 32bit OSes. Unfortunately LLVM doesn't emit EMMS itself, and there is no easy/effective way to disable MMX. http://llvm.org/bugs/show_bug.cgi?id=3287 --- src/gallium/auxiliary/gallivm/lp_bld_misc.cpp | 24 ++++++++++++++++++++++++ src/gallium/drivers/llvmpipe/lp_state_fs.c | 5 +++++ 2 files changed, 29 insertions(+) (limited to 'src/gallium') diff --git a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp index 5a9488b5f7..072408b268 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp +++ b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp @@ -39,6 +39,7 @@ #include #include #include +#include #include "pipe/p_config.h" #include "util/u_debug.h" @@ -141,4 +142,27 @@ lp_set_target_options(void) #if 0 llvm::UnsafeFPMath = true; #endif + +#if 0 + /* + * LLVM will generate MMX instructions for vectors <= 64 bits, leading to + * innefficient code, and in 32bit systems, to the corruption of the FPU + * stack given that it expects the user to generate the EMMS instructions. + * + * See also: + * - http://llvm.org/bugs/show_bug.cgi?id=3287 + * - http://l4.me.uk/post/2009/06/07/llvm-wrinkle-3-configuration-what-configuration/ + * + * XXX: Unfortunately this is not working. + */ + static boolean first = FALSE; + if (first) { + static const char* options[] = { + "prog", + "-disable-mmx" + }; + llvm::cl::ParseCommandLineOptions(2, const_cast(options)); + first = FALSE; + } +#endif } diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c index 5953d690a4..dbcc286417 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c @@ -676,6 +676,11 @@ generate_fragment(struct llvmpipe_context *lp, color_ptr); } +#ifdef PIPE_ARCH_X86 + /* Avoid corrupting the FPU stack on 32bit OSes. */ + lp_build_intrinsic(builder, "llvm.x86.mmx.emms", LLVMVoidType(), NULL, 0); +#endif + LLVMBuildRetVoid(builder); LLVMDisposeBuilder(builder); -- cgit v1.2.3 From 111902f2c47377f7d7ea41af6a2a29a087350f17 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Mon, 26 Jul 2010 12:16:45 +0100 Subject: draw: Also emit EMMS on generated LLVM IR. --- src/gallium/auxiliary/draw/draw_llvm.c | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'src/gallium') diff --git a/src/gallium/auxiliary/draw/draw_llvm.c b/src/gallium/auxiliary/draw/draw_llvm.c index 19f96c37ab..48489e5f6f 100644 --- a/src/gallium/auxiliary/draw/draw_llvm.c +++ b/src/gallium/auxiliary/draw/draw_llvm.c @@ -37,6 +37,7 @@ #include "gallivm/lp_bld_debug.h" #include "gallivm/lp_bld_tgsi.h" #include "gallivm/lp_bld_printf.h" +#include "gallivm/lp_bld_intr.h" #include "tgsi/tgsi_exec.h" #include "tgsi/tgsi_dump.h" @@ -793,6 +794,11 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant) sampler->destroy(sampler); +#ifdef PIPE_ARCH_X86 + /* Avoid corrupting the FPU stack on 32bit OSes. */ + lp_build_intrinsic(builder, "llvm.x86.mmx.emms", LLVMVoidType(), NULL, 0); +#endif + LLVMBuildRetVoid(builder); LLVMDisposeBuilder(builder); @@ -963,6 +969,11 @@ draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *varian sampler->destroy(sampler); +#ifdef PIPE_ARCH_X86 + /* Avoid corrupting the FPU stack on 32bit OSes. */ + lp_build_intrinsic(builder, "llvm.x86.mmx.emms", LLVMVoidType(), NULL, 0); +#endif + LLVMBuildRetVoid(builder); LLVMDisposeBuilder(builder); -- cgit v1.2.3 From bdaa8be5b7dce396dd98f05e7ad66d48633cc4cb Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Tue, 20 Jul 2010 13:40:01 +0100 Subject: scons: Use the current python executable for code generation. Less susceptible to be broken. --- src/gallium/auxiliary/SConscript | 4 ++-- src/gallium/drivers/llvmpipe/SConscript | 3 ++- 2 files changed, 4 insertions(+), 3 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/auxiliary/SConscript b/src/gallium/auxiliary/SConscript index 72a16617db..8381ae5b3e 100644 --- a/src/gallium/auxiliary/SConscript +++ b/src/gallium/auxiliary/SConscript @@ -34,14 +34,14 @@ env.CodeGenerate( target = 'util/u_format_table.c', script = '#src/gallium/auxiliary/util/u_format_table.py', source = ['#src/gallium/auxiliary/util/u_format.csv'], - command = 'python $SCRIPT $SOURCE > $TARGET' + command = python_cmd + ' $SCRIPT $SOURCE > $TARGET' ) env.CodeGenerate( target = 'util/u_half.c', script = 'util/u_half.py', source = [], - command = 'python $SCRIPT > $TARGET' + command = python_cmd + ' $SCRIPT > $TARGET' ) env.Depends('util/u_format_table.c', [ diff --git a/src/gallium/drivers/llvmpipe/SConscript b/src/gallium/drivers/llvmpipe/SConscript index fd6ba1561e..5583fca38e 100644 --- a/src/gallium/drivers/llvmpipe/SConscript +++ b/src/gallium/drivers/llvmpipe/SConscript @@ -1,3 +1,4 @@ +from sys import executable as python_cmd import distutils.version Import('*') @@ -16,7 +17,7 @@ env.CodeGenerate( target = 'lp_tile_soa.c', script = 'lp_tile_soa.py', source = ['#src/gallium/auxiliary/util/u_format.csv'], - command = 'python $SCRIPT $SOURCE > $TARGET' + command = python_cmd + ' $SCRIPT $SOURCE > $TARGET' ) # XXX: Our dependency scanner only finds depended modules in relative dirs. -- cgit v1.2.3 From a258701cd949d45041ed571ca08fedc40de2cf69 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Fri, 16 Jul 2010 12:50:01 +0100 Subject: util: add uint version of pack_z_stencil Useful for packing mask values. --- src/gallium/auxiliary/util/u_pack_color.h | 47 +++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) (limited to 'src/gallium') diff --git a/src/gallium/auxiliary/util/u_pack_color.h b/src/gallium/auxiliary/util/u_pack_color.h index 3ebef9fb74..5f113f742b 100644 --- a/src/gallium/auxiliary/util/u_pack_color.h +++ b/src/gallium/auxiliary/util/u_pack_color.h @@ -425,6 +425,53 @@ util_pack_color(const float rgba[4], enum pipe_format format, union util_color * } } +/* Integer versions of util_pack_z and util_pack_z_stencil - useful for + * constructing clear masks. + */ +static INLINE uint +util_pack_uint_z(enum pipe_format format, unsigned z) +{ + switch (format) { + case PIPE_FORMAT_Z16_UNORM: + return z & 0xffff; + case PIPE_FORMAT_Z32_UNORM: + case PIPE_FORMAT_Z32_FLOAT: + return z; + case PIPE_FORMAT_Z24_UNORM_S8_USCALED: + case PIPE_FORMAT_Z24X8_UNORM: + return z & 0xffffff; + case PIPE_FORMAT_S8_USCALED_Z24_UNORM: + case PIPE_FORMAT_X8Z24_UNORM: + return (z & 0xffffff) << 8; + case PIPE_FORMAT_S8_USCALED: + return 0; + default: + debug_print_format("gallium: unhandled format in util_pack_z()", format); + assert(0); + return 0; + } +} + +static INLINE uint +util_pack_uint_z_stencil(enum pipe_format format, double z, uint s) +{ + unsigned packed = util_pack_uint_z(format, z); + + s &= 0xff; + + switch (format) { + case PIPE_FORMAT_Z24_UNORM_S8_USCALED: + return packed | (s << 24); + case PIPE_FORMAT_S8_USCALED_Z24_UNORM: + return packed | s; + case PIPE_FORMAT_S8_USCALED: + return packed | s; + default: + return packed; + } +} + + /** * Note: it's assumed that z is in [0,1] -- cgit v1.2.3 From 5f90e76c54bbf4456c977b3cbca450d7a570179e Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 29 Jul 2010 19:18:48 +0100 Subject: gallivm: fix lp_build_sample_offset() crash when indexing a 1-D texture If y==NULL and y_stride==NULL it means the texture is 1D. Return zero for out_i and the offset instead of garbage. --- src/gallium/auxiliary/gallivm/lp_bld_sample.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'src/gallium') diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample.c b/src/gallium/auxiliary/gallivm/lp_bld_sample.c index 0fd014ab9b..655c4fb901 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_sample.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.c @@ -179,6 +179,9 @@ lp_build_sample_offset(struct lp_build_context *bld, LLVMValueRef y_offset = lp_build_mul(bld, y, y_stride); offset = lp_build_add(bld, offset, y_offset); } + else { + *out_j = bld->zero; + } if (z && z_stride) { LLVMValueRef z_offset = lp_build_mul(bld, z, z_stride); -- cgit v1.2.3 From 02da55676bd483df5e8540e079f53c7f41178025 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Thu, 29 Jul 2010 19:38:02 +0100 Subject: Revert "gallivm: fix lp_build_sample_offset() crash when indexing a 1-D texture" This reverts commit 5f90e76c54bbf4456c977b3cbca450d7a570179e. Bad cherry-pick. --- src/gallium/auxiliary/gallivm/lp_bld_sample.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample.c b/src/gallium/auxiliary/gallivm/lp_bld_sample.c index 655c4fb901..0fd014ab9b 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_sample.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.c @@ -179,9 +179,6 @@ lp_build_sample_offset(struct lp_build_context *bld, LLVMValueRef y_offset = lp_build_mul(bld, y, y_stride); offset = lp_build_add(bld, offset, y_offset); } - else { - *out_j = bld->zero; - } if (z && z_stride) { LLVMValueRef z_offset = lp_build_mul(bld, z, z_stride); -- cgit v1.2.3 From 8f3fe7e2f0a3ce1a5c45fd204b0105f3b501e641 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 29 Jul 2010 12:03:33 -0600 Subject: gallivm: added lp_build_assert() function to make assertions in LLVM code --- src/gallium/auxiliary/Makefile | 1 + src/gallium/auxiliary/SConscript | 1 + src/gallium/auxiliary/gallivm/lp_bld_assert.c | 101 ++++++++++++++++++++++++++ src/gallium/auxiliary/gallivm/lp_bld_assert.h | 41 +++++++++++ 4 files changed, 144 insertions(+) create mode 100644 src/gallium/auxiliary/gallivm/lp_bld_assert.c create mode 100644 src/gallium/auxiliary/gallivm/lp_bld_assert.h (limited to 'src/gallium') diff --git a/src/gallium/auxiliary/Makefile b/src/gallium/auxiliary/Makefile index dcebab7c0f..843b72bc38 100644 --- a/src/gallium/auxiliary/Makefile +++ b/src/gallium/auxiliary/Makefile @@ -149,6 +149,7 @@ C_SOURCES = \ GALLIVM_SOURCES = \ gallivm/lp_bld_arit.c \ + gallivm/lp_bld_assert.c \ gallivm/lp_bld_const.c \ gallivm/lp_bld_conv.c \ gallivm/lp_bld_debug.c \ diff --git a/src/gallium/auxiliary/SConscript b/src/gallium/auxiliary/SConscript index 8381ae5b3e..1f09198721 100644 --- a/src/gallium/auxiliary/SConscript +++ b/src/gallium/auxiliary/SConscript @@ -198,6 +198,7 @@ source = [ if env['llvm']: source += [ 'gallivm/lp_bld_arit.c', + 'gallivm/lp_bld_assert.c', 'gallivm/lp_bld_const.c', 'gallivm/lp_bld_conv.c', 'gallivm/lp_bld_debug.c', diff --git a/src/gallium/auxiliary/gallivm/lp_bld_assert.c b/src/gallium/auxiliary/gallivm/lp_bld_assert.c new file mode 100644 index 0000000000..f2ebd868a8 --- /dev/null +++ b/src/gallium/auxiliary/gallivm/lp_bld_assert.c @@ -0,0 +1,101 @@ +/************************************************************************** + * + * Copyright 2010 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "util/u_debug.h" +#include "util/u_memory.h" +#include "lp_bld_assert.h" +#include "lp_bld_init.h" +#include "lp_bld_printf.h" + + +/** + * A call to lp_build_assert() will build a function call to this function. + */ +static void +lp_assert(int condition, const char *msg) +{ + if (!condition) { + debug_printf("LLVM assertion '%s' failed!\n", msg); + assert(condition); + } +} + + + +/** + * lp_build_assert. + * + * Build an assertion in LLVM IR by building a function call to the + * lp_assert() function above. + * + * \param condition should be an 'i1' or 'i32' value + * \param msg a string to print if the assertion fails. + */ +LLVMValueRef +lp_build_assert(LLVMBuilderRef builder, LLVMValueRef condition, + const char *msg) +{ + LLVMModuleRef module; + LLVMTypeRef arg_types[2]; + LLVMValueRef msg_string, assert_func, params[2], r; + + module = LLVMGetGlobalParent(LLVMGetBasicBlockParent( + LLVMGetInsertBlock(builder))); + + msg_string = lp_build_const_string_variable(module, msg, strlen(msg) + 1); + + arg_types[0] = LLVMInt32Type(); + arg_types[1] = LLVMPointerType(LLVMInt8Type(), 0); + + /* lookup the lp_assert function */ + assert_func = LLVMGetNamedFunction(module, "lp_assert"); + + /* Create the assertion function if not found */ + if (!assert_func) { + LLVMTypeRef func_type = + LLVMFunctionType(LLVMVoidType(), arg_types, 2, 0); + + assert_func = LLVMAddFunction(module, "lp_assert", func_type); + LLVMSetFunctionCallConv(assert_func, LLVMCCallConv); + LLVMSetLinkage(assert_func, LLVMExternalLinkage); + LLVMAddGlobalMapping(lp_build_engine, assert_func, + func_to_pointer((func_pointer)lp_assert)); + } + assert(assert_func); + + /* build function call param list */ + params[0] = LLVMBuildZExt(builder, condition, arg_types[0], ""); + params[1] = LLVMBuildBitCast(builder, msg_string, arg_types[1], ""); + + /* check arg types */ + assert(LLVMTypeOf(params[0]) == arg_types[0]); + assert(LLVMTypeOf(params[1]) == arg_types[1]); + + r = LLVMBuildCall(builder, assert_func, params, 2, ""); + + return r; +} diff --git a/src/gallium/auxiliary/gallivm/lp_bld_assert.h b/src/gallium/auxiliary/gallivm/lp_bld_assert.h new file mode 100644 index 0000000000..ddd879dc2c --- /dev/null +++ b/src/gallium/auxiliary/gallivm/lp_bld_assert.h @@ -0,0 +1,41 @@ +/************************************************************************** + * + * Copyright 2010 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef LP_BLD_ASSERT_H +#define LP_BLD_ASSERT_H + + +#include "lp_bld.h" + + +LLVMValueRef +lp_build_assert(LLVMBuilderRef builder, LLVMValueRef condition, + const char *msg); + + +#endif + -- cgit v1.2.3 From d05cb9f0187984e461b41eb1ba6ca2adf0593c74 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 29 Jul 2010 12:49:12 -0600 Subject: llvmpipe: don't call LLVMCreateJITCompiler() twice Fixes a failed assertion with LLVM 2.6: ::JITResolver::JITResolver(llvm::JIT&): Assertion `TheJITResolver == 0&& "Multiple JIT resolvers?"' failed. Though, not everyone seems to experience this problem. --- src/gallium/drivers/llvmpipe/lp_test_blend.c | 14 ++------------ src/gallium/drivers/llvmpipe/lp_test_conv.c | 14 ++------------ src/gallium/drivers/llvmpipe/lp_test_printf.c | 6 ++++++ src/gallium/drivers/llvmpipe/lp_test_round.c | 12 ++---------- src/gallium/drivers/llvmpipe/lp_test_sincos.c | 13 ++----------- 5 files changed, 14 insertions(+), 45 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/drivers/llvmpipe/lp_test_blend.c b/src/gallium/drivers/llvmpipe/lp_test_blend.c index 0c95555655..d0389f0cb0 100644 --- a/src/gallium/drivers/llvmpipe/lp_test_blend.c +++ b/src/gallium/drivers/llvmpipe/lp_test_blend.c @@ -37,6 +37,7 @@ */ +#include "gallivm/lp_bld_init.h" #include "gallivm/lp_bld_type.h" #include "gallivm/lp_bld_debug.h" #include "lp_bld_blend.h" @@ -485,8 +486,7 @@ test_one(unsigned verbose, { LLVMModuleRef module = NULL; LLVMValueRef func = NULL; - LLVMExecutionEngineRef engine = NULL; - LLVMModuleProviderRef provider = NULL; + LLVMExecutionEngineRef engine = lp_build_engine; LLVMPassManagerRef pass = NULL; char *error = NULL; blend_test_ptr_t blend_test_ptr; @@ -510,15 +510,6 @@ test_one(unsigned verbose, } LLVMDisposeMessage(error); - provider = LLVMCreateModuleProviderForExistingModule(module); - if (LLVMCreateJITCompiler(&engine, provider, 1, &error)) { - if(verbose < 1) - dump_blend_type(stderr, blend, mode, type); - fprintf(stderr, "%s\n", error); - LLVMDisposeMessage(error); - abort(); - } - #if 0 pass = LLVMCreatePassManager(); LLVMAddTargetData(LLVMGetExecutionEngineTargetData(engine), pass); @@ -735,7 +726,6 @@ test_one(unsigned verbose, LLVMFreeMachineCodeForFunction(engine, func); - LLVMDisposeExecutionEngine(engine); if(pass) LLVMDisposePassManager(pass); diff --git a/src/gallium/drivers/llvmpipe/lp_test_conv.c b/src/gallium/drivers/llvmpipe/lp_test_conv.c index cf41b40581..3ba42bf11a 100644 --- a/src/gallium/drivers/llvmpipe/lp_test_conv.c +++ b/src/gallium/drivers/llvmpipe/lp_test_conv.c @@ -35,6 +35,7 @@ #include "util/u_pointer.h" +#include "gallivm/lp_bld_init.h" #include "gallivm/lp_bld_type.h" #include "gallivm/lp_bld_const.h" #include "gallivm/lp_bld_conv.h" @@ -152,8 +153,7 @@ test_one(unsigned verbose, { LLVMModuleRef module = NULL; LLVMValueRef func = NULL; - LLVMExecutionEngineRef engine = NULL; - LLVMModuleProviderRef provider = NULL; + LLVMExecutionEngineRef engine = lp_build_engine; LLVMPassManagerRef pass = NULL; char *error = NULL; conv_test_ptr_t conv_test_ptr; @@ -203,15 +203,6 @@ test_one(unsigned verbose, } LLVMDisposeMessage(error); - provider = LLVMCreateModuleProviderForExistingModule(module); - if (LLVMCreateJITCompiler(&engine, provider, 1, &error)) { - if(verbose < 1) - dump_conv_types(stderr, src_type, dst_type); - fprintf(stderr, "%s\n", error); - LLVMDisposeMessage(error); - abort(); - } - #if 0 pass = LLVMCreatePassManager(); LLVMAddTargetData(LLVMGetExecutionEngineTargetData(engine), pass); @@ -351,7 +342,6 @@ test_one(unsigned verbose, LLVMFreeMachineCodeForFunction(engine, func); - LLVMDisposeExecutionEngine(engine); if(pass) LLVMDisposePassManager(pass); diff --git a/src/gallium/drivers/llvmpipe/lp_test_printf.c b/src/gallium/drivers/llvmpipe/lp_test_printf.c index 21df83f9d8..62041f0301 100644 --- a/src/gallium/drivers/llvmpipe/lp_test_printf.c +++ b/src/gallium/drivers/llvmpipe/lp_test_printf.c @@ -31,6 +31,7 @@ #include "util/u_pointer.h" #include "gallivm/lp_bld.h" +#include "gallivm/lp_bld_init.h" #include "gallivm/lp_bld_printf.h" #include @@ -74,6 +75,7 @@ add_printf_test(LLVMModuleRef module) lp_build_printf(builder, "hello, world\n"); lp_build_printf(builder, "print 5 6: %d %d\n", LLVMConstInt(LLVMInt32Type(), 5, 0), LLVMConstInt(LLVMInt32Type(), 6, 0)); + LLVMBuildRetVoid(builder); LLVMDisposeBuilder(builder); return func; @@ -107,11 +109,15 @@ test_printf(unsigned verbose, FILE *fp, const struct printf_test_case *testcase) LLVMDisposeMessage(error); provider = LLVMCreateModuleProviderForExistingModule(module); +#if 0 if (LLVMCreateJITCompiler(&engine, provider, 1, &error)) { fprintf(stderr, "%s\n", error); LLVMDisposeMessage(error); abort(); } +#else + engine = lp_build_engine; +#endif #if 0 pass = LLVMCreatePassManager(); diff --git a/src/gallium/drivers/llvmpipe/lp_test_round.c b/src/gallium/drivers/llvmpipe/lp_test_round.c index f571a81a4a..57b0ee5776 100644 --- a/src/gallium/drivers/llvmpipe/lp_test_round.c +++ b/src/gallium/drivers/llvmpipe/lp_test_round.c @@ -31,7 +31,7 @@ #include "util/u_pointer.h" #include "gallivm/lp_bld.h" -#include "gallivm/lp_bld_printf.h" +#include "gallivm/lp_bld_init.h" #include "gallivm/lp_bld_arit.h" #include @@ -121,8 +121,7 @@ test_round(unsigned verbose, FILE *fp) { LLVMModuleRef module = NULL; LLVMValueRef test_round = NULL, test_trunc, test_floor, test_ceil; - LLVMExecutionEngineRef engine = NULL; - LLVMModuleProviderRef provider = NULL; + LLVMExecutionEngineRef engine = lp_build_engine; LLVMPassManagerRef pass = NULL; char *error = NULL; test_round_t round_func, trunc_func, floor_func, ceil_func; @@ -145,13 +144,6 @@ test_round(unsigned verbose, FILE *fp) } LLVMDisposeMessage(error); - provider = LLVMCreateModuleProviderForExistingModule(module); - if (LLVMCreateJITCompiler(&engine, provider, 1, &error)) { - fprintf(stderr, "%s\n", error); - LLVMDisposeMessage(error); - abort(); - } - #if 0 pass = LLVMCreatePassManager(); LLVMAddTargetData(LLVMGetExecutionEngineTargetData(engine), pass); diff --git a/src/gallium/drivers/llvmpipe/lp_test_sincos.c b/src/gallium/drivers/llvmpipe/lp_test_sincos.c index 1366ecddcb..e93c1b7859 100644 --- a/src/gallium/drivers/llvmpipe/lp_test_sincos.c +++ b/src/gallium/drivers/llvmpipe/lp_test_sincos.c @@ -30,7 +30,7 @@ #include #include "gallivm/lp_bld.h" -#include "gallivm/lp_bld_printf.h" +#include "gallivm/lp_bld_init.h" #include "gallivm/lp_bld_arit.h" #include @@ -101,8 +101,7 @@ test_sincos(unsigned verbose, FILE *fp) { LLVMModuleRef module = NULL; LLVMValueRef test_sin = NULL, test_cos = NULL; - LLVMExecutionEngineRef engine = NULL; - LLVMModuleProviderRef provider = NULL; + LLVMExecutionEngineRef engine = lp_build_engine; LLVMPassManagerRef pass = NULL; char *error = NULL; test_sincos_t sin_func; @@ -122,13 +121,6 @@ test_sincos(unsigned verbose, FILE *fp) } LLVMDisposeMessage(error); - provider = LLVMCreateModuleProviderForExistingModule(module); - if (LLVMCreateJITCompiler(&engine, provider, 1, &error)) { - fprintf(stderr, "%s\n", error); - LLVMDisposeMessage(error); - abort(); - } - #if 0 pass = LLVMCreatePassManager(); LLVMAddTargetData(LLVMGetExecutionEngineTargetData(engine), pass); @@ -162,7 +154,6 @@ test_sincos(unsigned verbose, FILE *fp) LLVMFreeMachineCodeForFunction(engine, test_sin); LLVMFreeMachineCodeForFunction(engine, test_cos); - LLVMDisposeExecutionEngine(engine); if(pass) LLVMDisposePassManager(pass); -- cgit v1.2.3 From 330852b3b33883b8fb22ce8c67efae79e64ce273 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 29 Jul 2010 12:51:34 -0600 Subject: llvmpipe: also test the new lp_build_assert() function --- src/gallium/drivers/llvmpipe/lp_test_printf.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'src/gallium') diff --git a/src/gallium/drivers/llvmpipe/lp_test_printf.c b/src/gallium/drivers/llvmpipe/lp_test_printf.c index 62041f0301..a3447bf53f 100644 --- a/src/gallium/drivers/llvmpipe/lp_test_printf.c +++ b/src/gallium/drivers/llvmpipe/lp_test_printf.c @@ -32,6 +32,7 @@ #include "util/u_pointer.h" #include "gallivm/lp_bld.h" #include "gallivm/lp_bld_init.h" +#include "gallivm/lp_bld_assert.h" #include "gallivm/lp_bld_printf.h" #include @@ -76,6 +77,9 @@ add_printf_test(LLVMModuleRef module) lp_build_printf(builder, "print 5 6: %d %d\n", LLVMConstInt(LLVMInt32Type(), 5, 0), LLVMConstInt(LLVMInt32Type(), 6, 0)); + /* Also test lp_build_assert(). This should not fail. */ + lp_build_assert(builder, LLVMConstInt(LLVMInt32Type(), 1, 0), "assert(1)"); + LLVMBuildRetVoid(builder); LLVMDisposeBuilder(builder); return func; -- cgit v1.2.3 From 3f4680d8e229d87e62972d0632c577873944d89d Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Thu, 29 Jul 2010 19:53:36 +0200 Subject: r300g/swtcl: fix crash after the draw_vbo merge --- src/gallium/drivers/r300/r300_render.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium') diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c index 35d7756584..987fbaf6a4 100644 --- a/src/gallium/drivers/r300/r300_render.c +++ b/src/gallium/drivers/r300/r300_render.c @@ -665,7 +665,7 @@ static void r300_swtcl_draw_vbo(struct pipe_context* pipe, { struct r300_context* r300 = r300_context(pipe); struct pipe_transfer *vb_transfer[PIPE_MAX_ATTRIBS]; - struct pipe_transfer *ib_transfer; + struct pipe_transfer *ib_transfer = NULL; unsigned count = info->count; int i; void* indices = NULL; -- cgit v1.2.3 From d8d7a3e0f9629a220e2394dd7c6634f2d6a93e20 Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Thu, 29 Jul 2010 20:52:17 +0200 Subject: r300g/swtcl: fix crash in ETQW and minor fixups The Draw flush inside r300_flush was the culprit. Also, no need to flush Draw when changing a state since the flush is already inside swtcl_draw_vbo. --- src/gallium/drivers/r300/r300_flush.c | 8 -------- src/gallium/drivers/r300/r300_render.c | 22 ++++++++++++++-------- src/gallium/drivers/r300/r300_state.c | 10 ---------- 3 files changed, 14 insertions(+), 26 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/drivers/r300/r300_flush.c b/src/gallium/drivers/r300/r300_flush.c index ae7b5759e7..fe182b6615 100644 --- a/src/gallium/drivers/r300/r300_flush.c +++ b/src/gallium/drivers/r300/r300_flush.c @@ -43,14 +43,6 @@ static void r300_flush(struct pipe_context* pipe, u_upload_flush(r300->upload_vb); u_upload_flush(r300->upload_ib); - /* We probably need to flush Draw, but we may have been called from - * within Draw. This feels kludgy, but it might be the best thing. - * - * Of course, the best thing is to kill Draw with fire. :3 */ - if (r300->draw && !r300->draw->flushing) { - draw_flush(r300->draw); - } - if (r300->dirty_hw) { r300_emit_hyperz_end(r300); r300_emit_query_end(r300); diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c index 987fbaf6a4..7c4294bc9f 100644 --- a/src/gallium/drivers/r300/r300_render.c +++ b/src/gallium/drivers/r300/r300_render.c @@ -681,11 +681,13 @@ static void r300_swtcl_draw_vbo(struct pipe_context* pipe, r300_update_derived_state(r300); for (i = 0; i < r300->vertex_buffer_count; i++) { - void* buf = pipe_buffer_map(pipe, - r300->vertex_buffer[i].buffer, - PIPE_TRANSFER_READ, - &vb_transfer[i]); - draw_set_mapped_vertex_buffer(r300->draw, i, buf); + if (r300->vertex_buffer[i].buffer) { + void *buf = pipe_buffer_map(pipe, + r300->vertex_buffer[i].buffer, + PIPE_TRANSFER_READ, + &vb_transfer[i]); + draw_set_mapped_vertex_buffer(r300->draw, i, buf); + } } if (info->indexed && r300->index_buffer.buffer) { @@ -709,9 +711,11 @@ static void r300_swtcl_draw_vbo(struct pipe_context* pipe, draw_flush(r300->draw); for (i = 0; i < r300->vertex_buffer_count; i++) { - pipe_buffer_unmap(pipe, r300->vertex_buffer[i].buffer, - vb_transfer[i]); - draw_set_mapped_vertex_buffer(r300->draw, i, NULL); + if (r300->vertex_buffer[i].buffer) { + pipe_buffer_unmap(pipe, r300->vertex_buffer[i].buffer, + vb_transfer[i]); + draw_set_mapped_vertex_buffer(r300->draw, i, NULL); + } } if (ib_transfer) { @@ -796,6 +800,8 @@ static void* r300_render_map_vertices(struct vbuf_render* render) PIPE_TRANSFER_WRITE, &r300render->vbo_transfer); + assert(r300render->vbo_ptr); + return ((uint8_t*)r300render->vbo_ptr + r300render->vbo_offset); } diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c index bccd7d7859..fced77e6f1 100644 --- a/src/gallium/drivers/r300/r300_state.c +++ b/src/gallium/drivers/r300/r300_state.c @@ -446,7 +446,6 @@ static void r300_set_clip_state(struct pipe_context* pipe, r300->clip_state.dirty = TRUE; } else { - draw_flush(r300->draw); draw_set_clip_state(r300->draw, state); } } @@ -728,10 +727,6 @@ static void return; } - if (r300->draw) { - draw_flush(r300->draw); - } - /* If nr_cbufs is changed from zero to non-zero or vice versa... */ if (!!old_state->nr_cbufs != !!state->nr_cbufs) { r300->blend_state.dirty = TRUE; @@ -1096,7 +1091,6 @@ static void r300_bind_rs_state(struct pipe_context* pipe, void* state) boolean last_two_sided_color = r300->two_sided_color; if (r300->draw && rs) { - draw_flush(r300->draw); draw_set_rasterizer_state(r300->draw, &rs->rs_draw, state); } @@ -1385,7 +1379,6 @@ static void r300_set_viewport_state(struct pipe_context* pipe, r300->viewport = *state; if (r300->draw) { - draw_flush(r300->draw); draw_set_viewport_state(r300->draw, state); viewport->vte_control = R300_VTX_XY_FMT | R300_VTX_Z_FMT; return; @@ -1486,7 +1479,6 @@ static void r300_set_vertex_buffers(struct pipe_context* pipe, } else { /* SW TCL. */ - draw_flush(r300->draw); draw_set_vertex_buffers(r300->draw, count, buffers); } @@ -1671,7 +1663,6 @@ static void r300_bind_vertex_elements_state(struct pipe_context *pipe, r300->velems = velems; if (r300->draw) { - draw_flush(r300->draw); draw_set_vertex_elements(r300->draw, velems->count, velems->velem); return; } @@ -1737,7 +1728,6 @@ static void r300_bind_vs_state(struct pipe_context* pipe, void* shader) r300->pvs_flush.dirty = TRUE; } else { - draw_flush(r300->draw); draw_bind_vertex_shader(r300->draw, (struct draw_vertex_shader*)vs->draw_vs); } -- cgit v1.2.3 From 7a73390f9126fd270d9891cd9d2bf38ef56d9b80 Mon Sep 17 00:00:00 2001 From: Jerome Glisse Date: Thu, 29 Jul 2010 14:51:06 -0400 Subject: r600g: mipmap early support + EX2/ABS instruction + culling Add mipmap support (demos/src/redbook/mipmap is working) Add EX2/ABS shader instruction support. Add face culling support. Misc fixes. Signed-off-by: Jerome Glisse --- src/gallium/drivers/r600/r600_asm.c | 2 ++ src/gallium/drivers/r600/r600_resource.h | 4 +-- src/gallium/drivers/r600/r600_shader.c | 16 +++++---- src/gallium/drivers/r600/r600_state.c | 56 ++++++++++++++++++++++++-------- src/gallium/drivers/r600/r600_texture.c | 24 +++++++------- src/gallium/drivers/r600/r600d.h | 40 +++++++++++++++++++++++ 6 files changed, 108 insertions(+), 34 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c index e678a2fdf2..e560f65dcd 100644 --- a/src/gallium/drivers/r600/r600_asm.c +++ b/src/gallium/drivers/r600/r600_asm.c @@ -294,6 +294,7 @@ int r600_bc_alu_build(struct r600_bc *bc, struct r600_bc_alu *alu, unsigned id) S_SQ_ALU_WORD0_LAST(alu->last); bc->bytecode[id++] = S_SQ_ALU_WORD1_DST_GPR(alu->dst.sel) | S_SQ_ALU_WORD1_DST_CHAN(alu->dst.chan) | + S_SQ_ALU_WORD1_CLAMP(alu->dst.clamp) | S_SQ_ALU_WORD1_OP3_SRC2_SEL(alu->src[2].sel) | S_SQ_ALU_WORD1_OP3_SRC2_CHAN(alu->src[2].chan) | S_SQ_ALU_WORD1_OP3_SRC2_NEG(alu->src[2].neg) | @@ -309,6 +310,7 @@ int r600_bc_alu_build(struct r600_bc *bc, struct r600_bc_alu *alu, unsigned id) S_SQ_ALU_WORD0_LAST(alu->last); bc->bytecode[id++] = S_SQ_ALU_WORD1_DST_GPR(alu->dst.sel) | S_SQ_ALU_WORD1_DST_CHAN(alu->dst.chan) | + S_SQ_ALU_WORD1_CLAMP(alu->dst.clamp) | S_SQ_ALU_WORD1_OP2_SRC0_ABS(alu->src[0].abs) | S_SQ_ALU_WORD1_OP2_SRC1_ABS(alu->src[1].abs) | S_SQ_ALU_WORD1_OP2_WRITE_MASK(alu->dst.write) | diff --git a/src/gallium/drivers/r600/r600_resource.h b/src/gallium/drivers/r600/r600_resource.h index 0139a3b777..bb90e76fb7 100644 --- a/src/gallium/drivers/r600/r600_resource.h +++ b/src/gallium/drivers/r600/r600_resource.h @@ -44,9 +44,9 @@ struct r600_resource_texture { struct r600_resource resource; unsigned long offset[PIPE_MAX_TEXTURE_LEVELS]; unsigned long pitch[PIPE_MAX_TEXTURE_LEVELS]; - unsigned long stride[PIPE_MAX_TEXTURE_LEVELS]; unsigned long layer_size[PIPE_MAX_TEXTURE_LEVELS]; - unsigned long stride_override; + unsigned long pitch_override; + unsigned long bpt; unsigned long size; }; diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index 3f1979b9cc..c61cc11e88 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -249,10 +249,6 @@ static int tgsi_is_supported(struct r600_shader_ctx *ctx) R600_ERR("too many dst (%d)\n", i->Instruction.NumDstRegs); return -EINVAL; } - if (i->Instruction.Saturate) { - R600_ERR("staturate unsupported\n"); - return -EINVAL; - } if (i->Instruction.Predicate) { R600_ERR("predicate unsupported\n"); return -EINVAL; @@ -507,10 +503,15 @@ static int tgsi_dst(struct r600_shader_ctx *ctx, unsigned swizzle, struct r600_bc_alu_dst *r600_dst) { + struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; + r600_dst->sel = tgsi_dst->Register.Index; r600_dst->sel += ctx->file_offset[tgsi_dst->Register.File]; r600_dst->chan = swizzle; r600_dst->write = 1; + if (inst->Instruction.Saturate) { + r600_dst->clamp = 1; + } return 0; } @@ -540,6 +541,9 @@ static int tgsi_op2(struct r600_shader_ctx *ctx) case TGSI_OPCODE_SUB: alu.src[1].neg = 1; break; + case TGSI_OPCODE_ABS: + alu.src[0].abs = 1; + break; default: break; } @@ -1040,13 +1044,13 @@ static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = { {TGSI_OPCODE_CLAMP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_FLR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_ROUND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_EX2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_EX2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, tgsi_trans}, {TGSI_OPCODE_LG2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_POW, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_XPD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, /* gap */ {32, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_ABS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_ABS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2}, {TGSI_OPCODE_RCC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_DPH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_COS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index 57879e8d8b..0191070daa 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -24,6 +24,7 @@ * Jerome Glisse */ #include +#include #include "util/u_inlines.h" #include "util/u_format.h" #include "util/u_memory.h" @@ -649,8 +650,8 @@ static struct radeon_state *r600_cb0(struct r600_context *rctx) rstate->placement[2] = RADEON_GEM_DOMAIN_GTT; rstate->placement[4] = RADEON_GEM_DOMAIN_GTT; rstate->nbo = 3; - pitch = rtex->pitch[level] / 8 - 1; - slice = rtex->pitch[level] * state->cbufs[0]->height / 64 - 1; + pitch = (rtex->pitch[level] / rtex->bpt) / 8 - 1; + slice = (rtex->pitch[level] / rtex->bpt) * state->cbufs[0]->height / 64 - 1; rstate->states[R600_CB0__CB_COLOR0_BASE] = 0x00000000; rstate->states[R600_CB0__CB_COLOR0_INFO] = 0x08110068; rstate->states[R600_CB0__CB_COLOR0_SIZE] = S_028060_PITCH_TILE_MAX(pitch) | @@ -666,6 +667,22 @@ static struct radeon_state *r600_cb0(struct r600_context *rctx) return rstate; } +int r600_db_format(unsigned pformat, unsigned *format) +{ + switch (pformat) { + case PIPE_FORMAT_Z24X8_UNORM: + *format = V_028010_DEPTH_X8_24; + return 0; + case PIPE_FORMAT_Z24_UNORM_S8_USCALED: + *format = V_028010_DEPTH_8_24; + return 0; + default: + *format = V_028010_DEPTH_INVALID; + R600_ERR("unsupported %d\n", pformat); + return -EINVAL; + } +} + static struct radeon_state *r600_db(struct r600_context *rctx) { struct r600_screen *rscreen = rctx->screen; @@ -674,7 +691,7 @@ static struct radeon_state *r600_db(struct r600_context *rctx) struct radeon_state *rstate; const struct pipe_framebuffer_state *state = &rctx->framebuffer->state.framebuffer; unsigned level = state->cbufs[0]->level; - unsigned pitch, slice; + unsigned pitch, slice, format; if (state->zsbuf == NULL) return NULL; @@ -689,10 +706,15 @@ static struct radeon_state *r600_db(struct r600_context *rctx) rstate->nbo = 1; rstate->placement[0] = RADEON_GEM_DOMAIN_VRAM; level = state->zsbuf->level; - pitch = rtex->pitch[level] / 8 - 1; - slice = rtex->pitch[level] * state->zsbuf->height / 64 - 1; + pitch = (rtex->pitch[level] / rtex->bpt) / 8 - 1; + slice = (rtex->pitch[level] / rtex->bpt) * state->zsbuf->height / 64 - 1; + if (r600_db_format(state->zsbuf->texture->format, &format)) { + radeon_state_decref(rstate); + return NULL; + } rstate->states[R600_DB__DB_DEPTH_BASE] = 0x00000000; - rstate->states[R600_DB__DB_DEPTH_INFO] = 0x00010006; + rstate->states[R600_DB__DB_DEPTH_INFO] = 0x00010000 | + S_028010_FORMAT(format); rstate->states[R600_DB__DB_DEPTH_VIEW] = 0x00000000; rstate->states[R600_DB__DB_PREFETCH_LIMIT] = (state->zsbuf->height / 8) -1; rstate->states[R600_DB__DB_DEPTH_SIZE] = S_028000_PITCH_TILE_MAX(pitch) | @@ -716,7 +738,10 @@ static struct radeon_state *r600_rasterizer(struct r600_context *rctx) return NULL; rstate->states[R600_RASTERIZER__SPI_INTERP_CONTROL_0] = 0x00000001; rstate->states[R600_RASTERIZER__PA_CL_CLIP_CNTL] = 0x00000000; - rstate->states[R600_RASTERIZER__PA_SU_SC_MODE_CNTL] = 0x00080000; + rstate->states[R600_RASTERIZER__PA_SU_SC_MODE_CNTL] = 0x00080000 | + S_028814_CULL_FRONT((state->cull_face & PIPE_FACE_FRONT) ? 1 : 0) | + S_028814_CULL_BACK((state->cull_face & PIPE_FACE_BACK) ? 1 : 0) | + S_028814_FACE(!state->front_ccw); rstate->states[R600_RASTERIZER__PA_CL_VS_OUT_CNTL] = 0x00000000; rstate->states[R600_RASTERIZER__PA_CL_NANINF_CNTL] = 0x00000000; rstate->states[R600_RASTERIZER__PA_SU_POINT_SIZE] = 0x00080008; @@ -910,6 +935,11 @@ static inline unsigned r600_tex_compare(unsigned compare) } } +static INLINE u32 S_FIXED(float value, u32 frac_bits) +{ + return value * (1 << frac_bits); +} + static struct radeon_state *r600_sampler(struct r600_context *rctx, const struct pipe_sampler_state *state, unsigned id) @@ -930,9 +960,9 @@ static struct radeon_state *r600_sampler(struct r600_context *rctx, S_03C000_DEPTH_COMPARE_FUNCTION(r600_tex_compare(state->compare_func)); /* FIXME LOD it depends on texture base level ... */ rstate->states[R600_PS_SAMPLER__SQ_TEX_SAMPLER_WORD1_0] = - S_03C004_MIN_LOD(0) | - S_03C004_MAX_LOD(0) | - S_03C004_LOD_BIAS(0); + S_03C004_MIN_LOD(S_FIXED(CLAMP(state->min_lod, 0, 15), 6)) | + S_03C004_MAX_LOD(S_FIXED(CLAMP(state->max_lod, 0, 15), 6)) | + S_03C004_LOD_BIAS(S_FIXED(CLAMP(state->lod_bias, -16, 16), 6)); rstate->states[R600_PS_SAMPLER__SQ_TEX_SAMPLER_WORD2_0] = S_03C008_TYPE(1); if (radeon_state_pm4(rstate)) { radeon_state_decref(rstate); @@ -1020,7 +1050,7 @@ static struct radeon_state *r600_resource(struct r600_context *rctx, /* FIXME properly handle first level != 0 */ rstate->states[R600_PS_RESOURCE__RESOURCE0_WORD0] = S_038000_DIM(r600_tex_dim(view->texture->target)) | - S_038000_PITCH((tmp->pitch[0] / 8) - 1) | + S_038000_PITCH(((tmp->pitch[0] / tmp->bpt) / 8) - 1) | S_038000_TEX_WIDTH(view->texture->width0 - 1); rstate->states[R600_PS_RESOURCE__RESOURCE0_WORD1] = S_038004_TEX_HEIGHT(view->texture->height0 - 1) | @@ -1036,9 +1066,9 @@ static struct radeon_state *r600_resource(struct r600_context *rctx, S_038010_NUM_FORMAT_ALL(V_038010_SQ_NUM_FORMAT_NORM) | S_038010_SRF_MODE_ALL(V_038010_SFR_MODE_NO_ZERO) | S_038010_REQUEST_SIZE(1) | - S_038010_DST_SEL_X(r600_tex_swizzle(view->swizzle_r)) | + S_038010_DST_SEL_X(r600_tex_swizzle(view->swizzle_b)) | S_038010_DST_SEL_Y(r600_tex_swizzle(view->swizzle_g)) | - S_038010_DST_SEL_Z(r600_tex_swizzle(view->swizzle_b)) | + S_038010_DST_SEL_Z(r600_tex_swizzle(view->swizzle_r)) | S_038010_DST_SEL_W(r600_tex_swizzle(view->swizzle_a)) | S_038010_BASE_LEVEL(view->first_level); rstate->states[R600_PS_RESOURCE__RESOURCE0_WORD5] = diff --git a/src/gallium/drivers/r600/r600_texture.c b/src/gallium/drivers/r600/r600_texture.c index ab20e97948..96173b0ed6 100644 --- a/src/gallium/drivers/r600/r600_texture.c +++ b/src/gallium/drivers/r600/r600_texture.c @@ -59,24 +59,22 @@ static unsigned long r600_texture_get_offset(struct r600_resource_texture *rtex, static void r600_setup_miptree(struct r600_screen *rscreen, struct r600_resource_texture *rtex) { struct pipe_resource *ptex = &rtex->resource.base.b; - unsigned long w, h, stride, size, layer_size, i, offset; + unsigned long w, h, pitch, size, layer_size, i, offset; + rtex->bpt = util_format_get_blocksize(ptex->format); for (i = 0, offset = 0; i <= ptex->last_level; i++) { w = u_minify(ptex->width0, i); h = u_minify(ptex->height0, i); - stride = align(util_format_get_stride(ptex->format, w), 32); - layer_size = stride * h; + pitch = util_format_get_stride(ptex->format, align(w, 64)); + layer_size = pitch * h; if (ptex->target == PIPE_TEXTURE_CUBE) size = layer_size * 6; else size = layer_size * u_minify(ptex->depth0, i); rtex->offset[i] = offset; rtex->layer_size[i] = layer_size; - rtex->pitch[i] = stride / util_format_get_blocksize(ptex->format); - rtex->pitch[i] += R600_TEXEL_PITCH_ALIGNMENT_MASK; - rtex->pitch[i] &= ~R600_TEXEL_PITCH_ALIGNMENT_MASK; - rtex->stride[i] = stride; - offset += align(size, 32); + rtex->pitch[i] = pitch; + offset += size; } rtex->size = offset; } @@ -183,11 +181,11 @@ struct pipe_resource *r600_texture_from_handle(struct pipe_screen *screen, pipe_reference_init(&resource->base.b.reference, 1); resource->base.b.screen = screen; resource->bo = bo; - rtex->stride_override = whandle->stride; - rtex->pitch[0] = whandle->stride / util_format_get_blocksize(templ->format); - rtex->stride[0] = whandle->stride; + rtex->pitch_override = whandle->stride; + rtex->bpt = util_format_get_blocksize(templ->format); + rtex->pitch[0] = whandle->stride; rtex->offset[0] = 0; - rtex->size = align(rtex->stride[0] * templ->height0, 32); + rtex->size = align(rtex->pitch[0] * templ->height0, 64); return &resource->base.b; } @@ -216,7 +214,7 @@ struct pipe_transfer* r600_texture_get_transfer(struct pipe_context *ctx, trans->transfer.sr = sr; trans->transfer.usage = usage; trans->transfer.box = *box; - trans->transfer.stride = rtex->stride[sr.level]; + trans->transfer.stride = rtex->pitch[sr.level]; trans->offset = r600_texture_get_offset(rtex, sr.level, box->z, sr.face); return &trans->transfer; } diff --git a/src/gallium/drivers/r600/r600d.h b/src/gallium/drivers/r600/r600d.h index 593b95c9c7..c1acfcd29e 100644 --- a/src/gallium/drivers/r600/r600d.h +++ b/src/gallium/drivers/r600/r600d.h @@ -316,6 +316,46 @@ #define S_028010_ZRANGE_PRECISION(x) (((x) & 0x1) << 31) #define G_028010_ZRANGE_PRECISION(x) (((x) >> 31) & 0x1) #define C_028010_ZRANGE_PRECISION 0x7FFFFFFF +#define R_028814_PA_SU_SC_MODE_CNTL 0x028814 +#define S_028814_CULL_FRONT(x) (((x) & 0x1) << 0) +#define G_028814_CULL_FRONT(x) (((x) >> 0) & 0x1) +#define C_028814_CULL_FRONT 0xFFFFFFFE +#define S_028814_CULL_BACK(x) (((x) & 0x1) << 1) +#define G_028814_CULL_BACK(x) (((x) >> 1) & 0x1) +#define C_028814_CULL_BACK 0xFFFFFFFD +#define S_028814_FACE(x) (((x) & 0x1) << 2) +#define G_028814_FACE(x) (((x) >> 2) & 0x1) +#define C_028814_FACE 0xFFFFFFFB +#define S_028814_POLY_MODE(x) (((x) & 0x3) << 3) +#define G_028814_POLY_MODE(x) (((x) >> 3) & 0x3) +#define C_028814_POLY_MODE 0xFFFFFFE7 +#define S_028814_POLYMODE_FRONT_PTYPE(x) (((x) & 0x7) << 5) +#define G_028814_POLYMODE_FRONT_PTYPE(x) (((x) >> 5) & 0x7) +#define C_028814_POLYMODE_FRONT_PTYPE 0xFFFFFF1F +#define S_028814_POLYMODE_BACK_PTYPE(x) (((x) & 0x7) << 8) +#define G_028814_POLYMODE_BACK_PTYPE(x) (((x) >> 8) & 0x7) +#define C_028814_POLYMODE_BACK_PTYPE 0xFFFFF8FF +#define S_028814_POLY_OFFSET_FRONT_ENABLE(x) (((x) & 0x1) << 11) +#define G_028814_POLY_OFFSET_FRONT_ENABLE(x) (((x) >> 11) & 0x1) +#define C_028814_POLY_OFFSET_FRONT_ENABLE 0xFFFFF7FF +#define S_028814_POLY_OFFSET_BACK_ENABLE(x) (((x) & 0x1) << 12) +#define G_028814_POLY_OFFSET_BACK_ENABLE(x) (((x) >> 12) & 0x1) +#define C_028814_POLY_OFFSET_BACK_ENABLE 0xFFFFEFFF +#define S_028814_POLY_OFFSET_PARA_ENABLE(x) (((x) & 0x1) << 13) +#define G_028814_POLY_OFFSET_PARA_ENABLE(x) (((x) >> 13) & 0x1) +#define C_028814_POLY_OFFSET_PARA_ENABLE 0xFFFFDFFF +#define S_028814_VTX_WINDOW_OFFSET_ENABLE(x) (((x) & 0x1) << 16) +#define G_028814_VTX_WINDOW_OFFSET_ENABLE(x) (((x) >> 16) & 0x1) +#define C_028814_VTX_WINDOW_OFFSET_ENABLE 0xFFFEFFFF +#define S_028814_PROVOKING_VTX_LAST(x) (((x) & 0x1) << 19) +#define G_028814_PROVOKING_VTX_LAST(x) (((x) >> 19) & 0x1) +#define C_028814_PROVOKING_VTX_LAST 0xFFF7FFFF +#define S_028814_PERSP_CORR_DIS(x) (((x) & 0x1) << 20) +#define G_028814_PERSP_CORR_DIS(x) (((x) >> 20) & 0x1) +#define C_028814_PERSP_CORR_DIS 0xFFEFFFFF +#define S_028814_MULTI_PRIM_IB_ENA(x) (((x) & 0x1) << 21) +#define G_028814_MULTI_PRIM_IB_ENA(x) (((x) >> 21) & 0x1) +#define C_028814_MULTI_PRIM_IB_ENA 0xFFDFFFFF #define R_028000_DB_DEPTH_SIZE 0x028000 #define S_028000_PITCH_TILE_MAX(x) (((x) & 0x3FF) << 0) #define G_028000_PITCH_TILE_MAX(x) (((x) >> 0) & 0x3FF) -- cgit v1.2.3 From d1f38baa01b8881232abfe79721296c0cf8c19a5 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 29 Jul 2010 13:26:42 -0600 Subject: llvmpipe: silence warnings in lp_test_sincos.c --- src/gallium/drivers/llvmpipe/lp_test_sincos.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/drivers/llvmpipe/lp_test_sincos.c b/src/gallium/drivers/llvmpipe/lp_test_sincos.c index e93c1b7859..7ab357f162 100644 --- a/src/gallium/drivers/llvmpipe/lp_test_sincos.c +++ b/src/gallium/drivers/llvmpipe/lp_test_sincos.c @@ -32,6 +32,7 @@ #include "gallivm/lp_bld.h" #include "gallivm/lp_bld_init.h" #include "gallivm/lp_bld_arit.h" +#include "util/u_pointer.h" #include #include @@ -136,8 +137,8 @@ test_sincos(unsigned verbose, FILE *fp) (void)pass; #endif - sin_func = (test_sincos_t)LLVMGetPointerToGlobal(engine, test_sin); - cos_func = (test_sincos_t)LLVMGetPointerToGlobal(engine, test_cos); + sin_func = (test_sincos_t) pointer_to_func(LLVMGetPointerToGlobal(engine, test_sin)); + cos_func = (test_sincos_t) pointer_to_func(LLVMGetPointerToGlobal(engine, test_cos)); memset(unpacked, 0, sizeof unpacked); -- cgit v1.2.3 From 8a2933f3663177f32f5ee45bb696463b8549dcbb Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 29 Jul 2010 13:44:44 -0600 Subject: draw: add vertex buffer offset in draw_print_arrays() --- src/gallium/auxiliary/draw/draw_pt.c | 1 + 1 file changed, 1 insertion(+) (limited to 'src/gallium') diff --git a/src/gallium/auxiliary/draw/draw_pt.c b/src/gallium/auxiliary/draw/draw_pt.c index 92d4113b4c..adef26a167 100644 --- a/src/gallium/auxiliary/draw/draw_pt.c +++ b/src/gallium/auxiliary/draw/draw_pt.c @@ -259,6 +259,7 @@ draw_print_arrays(struct draw_context *draw, uint prim, int start, uint count) for (j = 0; j < draw->pt.nr_vertex_elements; j++) { uint buf = draw->pt.vertex_element[j].vertex_buffer_index; ubyte *ptr = (ubyte *) draw->pt.user.vbuffer[buf]; + ptr += draw->pt.vertex_buffer[buf].buffer_offset; ptr += draw->pt.vertex_buffer[buf].stride * ii; ptr += draw->pt.vertex_element[j].src_offset; -- cgit v1.2.3 From ba2cc3b8e6ad161181b67fd2575c6bc768584d23 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 29 Jul 2010 13:49:21 -0600 Subject: gallium: implement bounds checking for constant buffers Plumb the constant buffer sizes down into the tgsi interpreter where we can do bounds checking. Optional debug code warns upon out-of-bounds reading. Plus add a few other assertions in the TGSI interpreter. --- src/gallium/auxiliary/draw/draw_context.c | 11 ++++- src/gallium/auxiliary/draw/draw_gs.c | 13 ++++-- src/gallium/auxiliary/draw/draw_gs.h | 1 + src/gallium/auxiliary/draw/draw_private.h | 7 ++- .../auxiliary/draw/draw_pt_fetch_shade_pipeline.c | 4 ++ .../draw/draw_pt_fetch_shade_pipeline_llvm.c | 1 + src/gallium/auxiliary/draw/draw_vs.c | 16 ++++++- src/gallium/auxiliary/draw/draw_vs.h | 3 +- src/gallium/auxiliary/draw/draw_vs_exec.c | 8 ++-- src/gallium/auxiliary/draw/draw_vs_llvm.c | 1 + src/gallium/auxiliary/draw/draw_vs_varient.c | 6 ++- src/gallium/auxiliary/tgsi/tgsi_exec.c | 53 +++++++++++++++++++--- src/gallium/auxiliary/tgsi/tgsi_exec.h | 10 ++++ src/gallium/drivers/softpipe/sp_context.h | 1 + src/gallium/drivers/softpipe/sp_quad_fs.c | 7 +-- src/gallium/drivers/softpipe/sp_state_fs.c | 2 + 16 files changed, 117 insertions(+), 27 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/auxiliary/draw/draw_context.c b/src/gallium/auxiliary/draw/draw_context.c index c127f74188..995b675b9a 100644 --- a/src/gallium/auxiliary/draw/draw_context.c +++ b/src/gallium/auxiliary/draw/draw_context.c @@ -288,12 +288,19 @@ draw_set_mapped_constant_buffer(struct draw_context *draw, shader_type == PIPE_SHADER_GEOMETRY); debug_assert(slot < PIPE_MAX_CONSTANT_BUFFERS); - if (shader_type == PIPE_SHADER_VERTEX) { + switch (shader_type) { + case PIPE_SHADER_VERTEX: draw->pt.user.vs_constants[slot] = buffer; + draw->pt.user.vs_constants_size[slot] = size; draw_vs_set_constants(draw, slot, buffer, size); - } else if (shader_type == PIPE_SHADER_GEOMETRY) { + break; + case PIPE_SHADER_GEOMETRY: draw->pt.user.gs_constants[slot] = buffer; + draw->pt.user.gs_constants_size[slot] = size; draw_gs_set_constants(draw, slot, buffer, size); + break; + default: + assert(0 && "invalid shader type in draw_set_mapped_constant_buffer"); } } diff --git a/src/gallium/auxiliary/draw/draw_gs.c b/src/gallium/auxiliary/draw/draw_gs.c index 0c590f936b..a36321d910 100644 --- a/src/gallium/auxiliary/draw/draw_gs.c +++ b/src/gallium/auxiliary/draw/draw_gs.c @@ -75,7 +75,7 @@ draw_gs_set_constants(struct draw_context *draw, const void *constants, unsigned size) { - /* noop */ + debug_printf("draw_gs_set_constants() not implemented yet!\n"); } @@ -394,8 +394,13 @@ static void gs_tri_adj(struct draw_geometry_shader *shader, const ushort *elts = input_prims->elts; #include "draw_gs_tmp.h" + +/** + * Execute geometry shader using TGSI interpreter. + */ int draw_geometry_shader_run(struct draw_geometry_shader *shader, const void *constants[PIPE_MAX_CONSTANT_BUFFERS], + const unsigned constants_size[PIPE_MAX_CONSTANT_BUFFERS], const struct draw_vertex_info *input_verts, const struct draw_prim_info *input_prim, struct draw_vertex_info *output_verts, @@ -405,7 +410,6 @@ int draw_geometry_shader_run(struct draw_geometry_shader *shader, unsigned input_stride = input_verts->vertex_size; unsigned vertex_size = input_verts->vertex_size; struct tgsi_exec_machine *machine = shader->machine; - unsigned int i; unsigned num_input_verts = input_prim->linear ? input_verts->count : input_prim->count; @@ -447,9 +451,8 @@ int draw_geometry_shader_run(struct draw_geometry_shader *shader, } shader->primitive_lengths = MALLOC(max_out_prims * sizeof(unsigned)); - for (i = 0; i < PIPE_MAX_CONSTANT_BUFFERS; i++) { - machine->Consts[i] = constants[i]; - } + tgsi_exec_set_constant_buffers(machine, PIPE_MAX_CONSTANT_BUFFERS, + constants, constants_size); if (input_prim->linear) gs_run(shader, input_prim, input_verts, diff --git a/src/gallium/auxiliary/draw/draw_gs.h b/src/gallium/auxiliary/draw/draw_gs.h index 06f4b822a2..67bc1aa73f 100644 --- a/src/gallium/auxiliary/draw/draw_gs.h +++ b/src/gallium/auxiliary/draw/draw_gs.h @@ -73,6 +73,7 @@ struct draw_geometry_shader { */ int draw_geometry_shader_run(struct draw_geometry_shader *shader, const void *constants[PIPE_MAX_CONSTANT_BUFFERS], + const unsigned constants_size[PIPE_MAX_CONSTANT_BUFFERS], const struct draw_vertex_info *input_verts, const struct draw_prim_info *input_prim, struct draw_vertex_info *output_verts, diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h index 058aeedc17..397d4bf653 100644 --- a/src/gallium/auxiliary/draw/draw_private.h +++ b/src/gallium/auxiliary/draw/draw_private.h @@ -163,9 +163,11 @@ struct draw_context /** vertex arrays */ const void *vbuffer[PIPE_MAX_ATTRIBS]; - /** constant buffer (for vertex/geometry shader) */ + /** constant buffers (for vertex/geometry shader) */ const void *vs_constants[PIPE_MAX_CONSTANT_BUFFERS]; + unsigned vs_constants_size[PIPE_MAX_CONSTANT_BUFFERS]; const void *gs_constants[PIPE_MAX_CONSTANT_BUFFERS]; + unsigned gs_constants_size[PIPE_MAX_CONSTANT_BUFFERS]; } user; boolean test_fse; /* enable FSE even though its not correct (eg for softpipe) */ @@ -198,6 +200,7 @@ struct draw_context struct pipe_viewport_state viewport; boolean identity_viewport; + /** Vertex shader state */ struct { struct draw_vertex_shader *vertex_shader; uint num_vs_outputs; /**< convenience, from vertex_shader */ @@ -227,6 +230,7 @@ struct draw_context struct translate_cache *emit_cache; } vs; + /** Geometry shader state */ struct { struct draw_geometry_shader *geometry_shader; uint num_gs_outputs; /**< convenience, from geometry_shader */ @@ -239,6 +243,7 @@ struct draw_context struct tgsi_sampler **samplers; } gs; + /** Stream output (vertex feedback) state */ struct { struct pipe_stream_output_state state; void *buffers[PIPE_MAX_SO_BUFFERS]; diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c index 121dfc414a..5b16c3788e 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c @@ -176,6 +176,7 @@ static void emit(struct pt_emit *emit, static void draw_vertex_shader_run(struct draw_vertex_shader *vshader, const void *constants[PIPE_MAX_CONSTANT_BUFFERS], + unsigned const_size[PIPE_MAX_CONSTANT_BUFFERS], const struct draw_vertex_info *input_verts, struct draw_vertex_info *output_verts ) { @@ -190,6 +191,7 @@ static void draw_vertex_shader_run(struct draw_vertex_shader *vshader, (const float (*)[4])input_verts->verts->data, ( float (*)[4])output_verts->verts->data, constants, + const_size, input_verts->count, input_verts->vertex_size, input_verts->vertex_size); @@ -236,6 +238,7 @@ static void fetch_pipeline_generic( struct draw_pt_middle_end *middle, if (fpme->opt & PT_SHADE) { draw_vertex_shader_run(vshader, draw->pt.user.vs_constants, + draw->pt.user.vs_constants_size, vert_info, &vs_vert_info); @@ -246,6 +249,7 @@ static void fetch_pipeline_generic( struct draw_pt_middle_end *middle, if ((fpme->opt & PT_SHADE) && gshader) { draw_geometry_shader_run(gshader, draw->pt.user.gs_constants, + draw->pt.user.gs_constants_size, vert_info, prim_info, &gs_vert_info, diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c index 5c9db12086..4b99bee86a 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c @@ -254,6 +254,7 @@ llvm_pipeline_generic( struct draw_pt_middle_end *middle, if ((opt & PT_SHADE) && gshader) { draw_geometry_shader_run(gshader, draw->pt.user.gs_constants, + draw->pt.user.gs_constants_size, vert_info, prim_info, &gs_vert_info, diff --git a/src/gallium/auxiliary/draw/draw_vs.c b/src/gallium/auxiliary/draw/draw_vs.c index 57ea63fc06..fb665b08ff 100644 --- a/src/gallium/auxiliary/draw/draw_vs.c +++ b/src/gallium/auxiliary/draw/draw_vs.c @@ -48,18 +48,30 @@ DEBUG_GET_ONCE_BOOL_OPTION(gallium_dump_vs, "GALLIUM_DUMP_VS", FALSE) + +/** + * Set a vertex shader constant buffer. + * \param slot which constant buffer in [0, PIPE_MAX_CONSTANT_BUFFERS-1] + * \param constants the mapped buffer + * \param size size of buffer in bytes + */ void draw_vs_set_constants(struct draw_context *draw, unsigned slot, const void *constants, unsigned size) { - if (((uintptr_t)constants) & 0xf) { + const int alignment = 16; + + /* check if buffer is 16-byte aligned */ + if (((uintptr_t)constants) & (alignment - 1)) { + /* if not, copy the constants into a new, 16-byte aligned buffer */ if (size > draw->vs.const_storage_size[slot]) { if (draw->vs.aligned_constant_storage[slot]) { align_free((void *)draw->vs.aligned_constant_storage[slot]); } - draw->vs.aligned_constant_storage[slot] = align_malloc(size, 16); + draw->vs.aligned_constant_storage[slot] = + align_malloc(size, alignment); } assert(constants); memcpy((void *)draw->vs.aligned_constant_storage[slot], diff --git a/src/gallium/auxiliary/draw/draw_vs.h b/src/gallium/auxiliary/draw/draw_vs.h index a731994523..f9a038788f 100644 --- a/src/gallium/auxiliary/draw/draw_vs.h +++ b/src/gallium/auxiliary/draw/draw_vs.h @@ -133,7 +133,8 @@ struct draw_vertex_shader { void (*run_linear)( struct draw_vertex_shader *shader, const float (*input)[4], float (*output)[4], - const void *constants[PIPE_MAX_CONSTANT_BUFFERS], + const void *constants[PIPE_MAX_CONSTANT_BUFFERS], + const unsigned const_size[PIPE_MAX_CONSTANT_BUFFERS], unsigned count, unsigned input_stride, unsigned output_stride ); diff --git a/src/gallium/auxiliary/draw/draw_vs_exec.c b/src/gallium/auxiliary/draw/draw_vs_exec.c index bc34d390da..dab3eb1ca8 100644 --- a/src/gallium/auxiliary/draw/draw_vs_exec.c +++ b/src/gallium/auxiliary/draw/draw_vs_exec.c @@ -85,7 +85,8 @@ static void vs_exec_run_linear( struct draw_vertex_shader *shader, const float (*input)[4], float (*output)[4], - const void *constants[PIPE_MAX_CONSTANT_BUFFERS], + const void *constants[PIPE_MAX_CONSTANT_BUFFERS], + const unsigned const_size[PIPE_MAX_CONSTANT_BUFFERS], unsigned count, unsigned input_stride, unsigned output_stride ) @@ -95,9 +96,8 @@ vs_exec_run_linear( struct draw_vertex_shader *shader, unsigned int i, j; unsigned slot; - for (i = 0; i < PIPE_MAX_CONSTANT_BUFFERS; i++) { - machine->Consts[i] = constants[i]; - } + tgsi_exec_set_constant_buffers(machine, PIPE_MAX_CONSTANT_BUFFERS, + constants, const_size); for (i = 0; i < count; i += MAX_TGSI_VERTICES) { unsigned int max_vertices = MIN2(MAX_TGSI_VERTICES, count - i); diff --git a/src/gallium/auxiliary/draw/draw_vs_llvm.c b/src/gallium/auxiliary/draw/draw_vs_llvm.c index 6c13df7913..d13ad24fff 100644 --- a/src/gallium/auxiliary/draw/draw_vs_llvm.c +++ b/src/gallium/auxiliary/draw/draw_vs_llvm.c @@ -49,6 +49,7 @@ vs_llvm_run_linear( struct draw_vertex_shader *shader, const float (*input)[4], float (*output)[4], const void *constants[PIPE_MAX_CONSTANT_BUFFERS], + const unsigned constants_size[PIPE_MAX_CONSTANT_BUFFERS], unsigned count, unsigned input_stride, unsigned output_stride ) diff --git a/src/gallium/auxiliary/draw/draw_vs_varient.c b/src/gallium/auxiliary/draw/draw_vs_varient.c index 6eb26927f2..eacd160187 100644 --- a/src/gallium/auxiliary/draw/draw_vs_varient.c +++ b/src/gallium/auxiliary/draw/draw_vs_varient.c @@ -149,7 +149,8 @@ static void PIPE_CDECL vsvg_run_elts( struct draw_vs_varient *varient, vsvg->base.vs->run_linear( vsvg->base.vs, temp_buffer, temp_buffer, - vsvg->base.vs->draw->pt.user.vs_constants, + vsvg->base.vs->draw->pt.user.vs_constants, + vsvg->base.vs->draw->pt.user.vs_constants_size, count, temp_vertex_stride, temp_vertex_stride); @@ -214,7 +215,8 @@ static void PIPE_CDECL vsvg_run_linear( struct draw_vs_varient *varient, vsvg->base.vs->run_linear( vsvg->base.vs, temp_buffer, temp_buffer, - vsvg->base.vs->draw->pt.user.vs_constants, + vsvg->base.vs->draw->pt.user.vs_constants, + vsvg->base.vs->draw->pt.user.vs_constants_size, count, temp_vertex_stride, temp_vertex_stride); diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c index 5275faa5e2..6fcbf4f212 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.c +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c @@ -557,6 +557,23 @@ print_temp(const struct tgsi_exec_machine *mach, uint index) #endif +void +tgsi_exec_set_constant_buffers(struct tgsi_exec_machine *mach, + unsigned num_bufs, + const void **bufs, + const unsigned *buf_sizes) +{ + unsigned i; + + for (i = 0; i < num_bufs; i++) { + mach->Consts[i] = bufs[i]; + mach->ConstsSize[i] = buf_sizes[i]; + } +} + + + + /** * Check if there's a potential src/dst register data dependency when * using SOA execution. @@ -632,6 +649,11 @@ tgsi_exec_machine_bind_shader( util_init_math(); + if (numSamplers) { + assert(samplers); + assert(samplers[0]); + } + mach->Tokens = tokens; mach->Samplers = samplers; @@ -1040,6 +1062,8 @@ fetch_src_file_channel(const struct tgsi_exec_machine *mach, { uint i; + assert(swizzle < 4); + switch (file) { case TGSI_FILE_CONSTANT: for (i = 0; i < QUAD_SIZE; i++) { @@ -1049,9 +1073,23 @@ fetch_src_file_channel(const struct tgsi_exec_machine *mach, if (index->i[i] < 0) { chan->u[i] = 0; } else { - const uint *p = (const uint *)mach->Consts[index2D->i[i]]; - - chan->u[i] = p[index->i[i] * 4 + swizzle]; + /* NOTE: copying the const value as a uint instead of float */ + const uint constbuf = index2D->i[i]; + const uint *buf = (const uint *)mach->Consts[constbuf]; + const int pos = index->i[i] * 4 + swizzle; + /* const buffer bounds check */ + if (pos < 0 || pos >= mach->ConstsSize[constbuf]) { + if (0) { + /* Debug: print warning */ + static int count = 0; + if (count++ < 100) + debug_printf("TGSI Exec: const buffer index %d" + " out of bounds\n", pos); + } + chan->u[i] = 0; + } + else + chan->u[i] = buf[pos]; } } break; @@ -1065,9 +1103,10 @@ fetch_src_file_channel(const struct tgsi_exec_machine *mach, index2D->i[i] * TGSI_EXEC_MAX_INPUT_ATTRIBS + index->i[i], index2D->i[i], index->i[i]); }*/ - chan->u[i] = mach->Inputs[index2D->i[i] * - TGSI_EXEC_MAX_INPUT_ATTRIBS + - index->i[i]].xyzw[swizzle].u[i]; + int pos = index2D->i[i] * TGSI_EXEC_MAX_INPUT_ATTRIBS + index->i[i]; + assert(pos >= 0); + assert(pos < Elements(mach->Inputs)); + chan->u[i] = mach->Inputs[pos].xyzw[swizzle].u[i]; } break; @@ -1187,7 +1226,7 @@ fetch_source(const struct tgsi_exec_machine *mach, index2.i[1] = index2.i[2] = index2.i[3] = reg->Indirect.Index; - + assert(reg->Indirect.File == TGSI_FILE_ADDRESS); /* get current value of address register[swizzle] */ swizzle = tgsi_util_get_src_register_swizzle( ®->Indirect, CHAN_X ); fetch_src_file_channel(mach, diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.h b/src/gallium/auxiliary/tgsi/tgsi_exec.h index ccf80ca6fd..6dee362d58 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.h +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.h @@ -253,7 +253,10 @@ struct tgsi_exec_machine struct tgsi_sampler **Samplers; unsigned ImmLimit; + const void *Consts[PIPE_MAX_CONSTANT_BUFFERS]; + unsigned ConstsSize[PIPE_MAX_CONSTANT_BUFFERS]; + const struct tgsi_token *Tokens; /**< Declarations, instructions */ unsigned Processor; /**< TGSI_PROCESSOR_x */ @@ -367,6 +370,13 @@ tgsi_set_exec_mask(struct tgsi_exec_machine *mach, } +extern void +tgsi_exec_set_constant_buffers(struct tgsi_exec_machine *mach, + unsigned num_bufs, + const void **bufs, + const unsigned *buf_sizes); + + #if defined __cplusplus } /* extern "C" */ #endif diff --git a/src/gallium/drivers/softpipe/sp_context.h b/src/gallium/drivers/softpipe/sp_context.h index c5f53cfa61..9361a3df09 100644 --- a/src/gallium/drivers/softpipe/sp_context.h +++ b/src/gallium/drivers/softpipe/sp_context.h @@ -112,6 +112,7 @@ struct softpipe_context { /** Mapped constant buffers */ const void *mapped_constants[PIPE_SHADER_TYPES][PIPE_MAX_CONSTANT_BUFFERS]; + unsigned const_buffer_size[PIPE_SHADER_TYPES][PIPE_MAX_CONSTANT_BUFFERS]; /** Vertex format */ struct vertex_info vertex_info; diff --git a/src/gallium/drivers/softpipe/sp_quad_fs.c b/src/gallium/drivers/softpipe/sp_quad_fs.c index d240bcbf3b..90f4787d59 100644 --- a/src/gallium/drivers/softpipe/sp_quad_fs.c +++ b/src/gallium/drivers/softpipe/sp_quad_fs.c @@ -111,9 +111,10 @@ shade_quads(struct quad_stage *qs, struct tgsi_exec_machine *machine = softpipe->fs_machine; unsigned i, nr_quads = 0; - for (i = 0; i < PIPE_MAX_CONSTANT_BUFFERS; i++) { - machine->Consts[i] = softpipe->mapped_constants[PIPE_SHADER_FRAGMENT][i]; - } + tgsi_exec_set_constant_buffers(machine, PIPE_MAX_CONSTANT_BUFFERS, + softpipe->mapped_constants[PIPE_SHADER_FRAGMENT], + softpipe->const_buffer_size[PIPE_SHADER_FRAGMENT]); + machine->InterpCoefs = quads[0]->coef; for (i = 0; i < nr; i++) { diff --git a/src/gallium/drivers/softpipe/sp_state_fs.c b/src/gallium/drivers/softpipe/sp_state_fs.c index 3fbf1f2578..ded242d3dc 100644 --- a/src/gallium/drivers/softpipe/sp_state_fs.c +++ b/src/gallium/drivers/softpipe/sp_state_fs.c @@ -195,6 +195,8 @@ softpipe_set_constant_buffer(struct pipe_context *pipe, } softpipe->mapped_constants[shader][index] = data; + softpipe->const_buffer_size[shader][index] = size; + softpipe->dirty |= SP_NEW_CONSTANTS; } -- cgit v1.2.3 From d88b6e19c14900f4cad94cf7a28d159369463108 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 29 Jul 2010 14:09:08 -0600 Subject: draw: assorted clean-ups in clipper code --- src/gallium/auxiliary/draw/draw_pipe_clip.c | 40 ++++++++++++++--------------- 1 file changed, 20 insertions(+), 20 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/auxiliary/draw/draw_pipe_clip.c b/src/gallium/auxiliary/draw/draw_pipe_clip.c index 1cf6ee7a7f..8a3d499feb 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_clip.c +++ b/src/gallium/auxiliary/draw/draw_pipe_clip.c @@ -68,8 +68,7 @@ struct clip_stage { }; -/* This is a bit confusing: - */ +/** Cast wrapper */ static INLINE struct clip_stage *clip_stage( struct draw_stage *stage ) { return (struct clip_stage *)stage; @@ -81,18 +80,22 @@ static INLINE struct clip_stage *clip_stage( struct draw_stage *stage ) /* All attributes are float[4], so this is easy: */ -static void interp_attr( float *fdst, +static void interp_attr( float dst[4], float t, - const float *fin, - const float *fout ) + const float in[4], + const float out[4] ) { - fdst[0] = LINTERP( t, fout[0], fin[0] ); - fdst[1] = LINTERP( t, fout[1], fin[1] ); - fdst[2] = LINTERP( t, fout[2], fin[2] ); - fdst[3] = LINTERP( t, fout[3], fin[3] ); + dst[0] = LINTERP( t, out[0], in[0] ); + dst[1] = LINTERP( t, out[1], in[1] ); + dst[2] = LINTERP( t, out[2], in[2] ); + dst[3] = LINTERP( t, out[3], in[3] ); } +/** + * Copy front/back, primary/secondary colors from src vertex to dst vertex. + * Used when flat shading. + */ static void copy_colors( struct draw_stage *stage, struct vertex_header *dst, const struct vertex_header *src ) @@ -121,20 +124,17 @@ static void interp( const struct clip_stage *clip, /* Vertex header. */ - { - dst->clipmask = 0; - dst->edgeflag = 0; /* will get overwritten later */ - dst->pad = 0; - dst->vertex_id = UNDEFINED_VERTEX_ID; - } + dst->clipmask = 0; + dst->edgeflag = 0; /* will get overwritten later */ + dst->pad = 0; + dst->vertex_id = UNDEFINED_VERTEX_ID; - /* Clip coordinates: interpolate normally + /* Interpolate the clip-space coords. */ - { - interp_attr(dst->clip, t, in->clip, out->clip); - } + interp_attr(dst->clip, t, in->clip, out->clip); - /* Do the projective divide and insert window coordinates: + /* Do the projective divide and viewport transformation to get + * new window coordinates: */ { const float *pos = dst->clip; -- cgit v1.2.3 From b4c8de1ff24d4d5e2fe550da54249934320acab4 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 29 Jul 2010 17:24:20 -0600 Subject: draw: do bounds checking of array elements (debug only) Make sure that all the element indexes actually lie inside the vertex buffer. Also, rename pipe_run() to pipe_run_elts() to be more specific. And assert/check the vertex count for the non-indexed case. --- src/gallium/auxiliary/draw/draw_pipe.c | 35 +++++++++++++++++++++++++--------- 1 file changed, 26 insertions(+), 9 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/auxiliary/draw/draw_pipe.c b/src/gallium/auxiliary/draw/draw_pipe.c index 8cd75ecf9a..144f10a5d0 100644 --- a/src/gallium/auxiliary/draw/draw_pipe.c +++ b/src/gallium/auxiliary/draw/draw_pipe.c @@ -220,7 +220,7 @@ static void do_triangle( struct draw_context *draw, do_point( draw, \ verts + stride * (elts[i0] & ~DRAW_PIPE_FLAG_MASK) ) -#define FUNC pipe_run +#define FUNC pipe_run_elts #define ARGS \ struct draw_context *draw, \ unsigned prim, \ @@ -269,14 +269,29 @@ void draw_pipeline_run( struct draw_context *draw, i < prim_info->primitive_count; start += prim_info->primitive_lengths[i], i++) { - unsigned count = prim_info->primitive_lengths[i]; - - pipe_run(draw, - prim_info->prim, - vert_info->verts, - vert_info->stride, - prim_info->elts + start, - count); + const unsigned count = prim_info->primitive_lengths[i]; + +#if DEBUG + /* make sure none of the element indexes go outside the vertex buffer */ + { + unsigned max_index = 0x0, i; + /* find the largest element index */ + for (i = 0; i < count; i++) { + unsigned int index = (prim_info->elts[start + i] + & ~DRAW_PIPE_FLAG_MASK); + if (index > max_index) + max_index = index; + } + assert(max_index <= vert_info->count); + } +#endif + + pipe_run_elts(draw, + prim_info->prim, + vert_info->verts, + vert_info->stride, + prim_info->elts + start, + count); } draw->pipeline.verts = NULL; @@ -378,6 +393,8 @@ void draw_pipeline_run_linear( struct draw_context *draw, draw->pipeline.vertex_stride = vert_info->stride; draw->pipeline.vertex_count = count; + assert(count <= vert_info->count); + pipe_run_linear(draw, prim_info->prim, (struct vertex_header*)verts, -- cgit v1.2.3 From 6c1625cc405f0d77523c122cedf3e8003f2aa7bf Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 29 Jul 2010 17:31:17 -0600 Subject: llvmpipe: added some jit debug code If we crash in the jitted function we can examine jit_line and jit_state in gdb to learn more about the shader. --- src/gallium/drivers/llvmpipe/lp_rast.c | 10 ++++++++++ src/gallium/drivers/llvmpipe/lp_rast_priv.h | 30 +++++++++++++++++++++++++++++ 2 files changed, 40 insertions(+) (limited to 'src/gallium') diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index ba7b48328b..3215d0f652 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -43,6 +43,12 @@ #include "lp_scene.h" +#ifdef DEBUG +int jit_line = 0; +const struct lp_rast_state *jit_state = NULL; +#endif + + /** * Begin rasterizing a scene. * Called once per scene by one thread. @@ -419,6 +425,7 @@ lp_rast_shade_tile(struct lp_rasterizer_task *task, depth = lp_rast_get_depth_block_pointer(task, tile_x + x, tile_y + y); /* run shader on 4x4 block */ + BEGIN_JIT_CALL(state); variant->jit_function[RAST_WHOLE]( &state->jit_context, tile_x + x, tile_y + y, inputs->facing, @@ -429,6 +436,7 @@ lp_rast_shade_tile(struct lp_rasterizer_task *task, depth, 0xffff, &task->vis_counter); + END_JIT_CALL(); } } } @@ -498,6 +506,7 @@ lp_rast_shade_quads_mask(struct lp_rasterizer_task *task, assert(lp_check_alignment(state->jit_context.blend_color, 16)); /* run shader on 4x4 block */ + BEGIN_JIT_CALL(state); variant->jit_function[RAST_EDGE_TEST](&state->jit_context, x, y, inputs->facing, @@ -508,6 +517,7 @@ lp_rast_shade_quads_mask(struct lp_rasterizer_task *task, depth, mask, &task->vis_counter); + END_JIT_CALL(); } diff --git a/src/gallium/drivers/llvmpipe/lp_rast_priv.h b/src/gallium/drivers/llvmpipe/lp_rast_priv.h index b4a48cfd02..760ab3db1f 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_priv.h +++ b/src/gallium/drivers/llvmpipe/lp_rast_priv.h @@ -40,6 +40,34 @@ #include "lp_limits.h" +/* If we crash in a jitted function, we can examine jit_line and jit_state + * to get some info. This is not thread-safe, however. + */ +#ifdef DEBUG + +extern int jit_line; +extern const struct lp_rast_state *jit_state; + +#define BEGIN_JIT_CALL(state) \ + do { \ + jit_line = __LINE__; \ + jit_state = state; \ + } while (0) + +#define END_JIT_CALL() \ + do { \ + jit_line = 0; \ + jit_state = NULL; \ + } while (0) + +#else + +#define BEGIN_JIT_CALL(X) +#define END_JIT_CALL + +#endif + + struct lp_rasterizer; @@ -249,6 +277,7 @@ lp_rast_shade_quads_all( struct lp_rasterizer_task *task, depth = lp_rast_get_depth_block_pointer(task, x, y); /* run shader on 4x4 block */ + BEGIN_JIT_CALL(state); variant->jit_function[RAST_WHOLE]( &state->jit_context, x, y, inputs->facing, @@ -259,6 +288,7 @@ lp_rast_shade_quads_all( struct lp_rasterizer_task *task, depth, 0xffff, &task->vis_counter ); + END_JIT_CALL(); } -- cgit v1.2.3 From e75da1a8d668aa2a245a7c2690b42ae7a6038f48 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 29 Jul 2010 17:49:49 -0600 Subject: llvmpipe: fix on-debug build breakage --- src/gallium/drivers/llvmpipe/lp_rast_priv.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium') diff --git a/src/gallium/drivers/llvmpipe/lp_rast_priv.h b/src/gallium/drivers/llvmpipe/lp_rast_priv.h index 760ab3db1f..fae7f6d3dc 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_priv.h +++ b/src/gallium/drivers/llvmpipe/lp_rast_priv.h @@ -63,7 +63,7 @@ extern const struct lp_rast_state *jit_state; #else #define BEGIN_JIT_CALL(X) -#define END_JIT_CALL +#define END_JIT_CALL() #endif -- cgit v1.2.3 From f623d06c495c671f687d4a70c9281c64e5875232 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Fri, 30 Jul 2010 10:47:20 +0100 Subject: util: more helpers for old draw code --- src/gallium/auxiliary/util/u_draw.h | 140 +++++++++++++++++++++++++++++++ src/gallium/auxiliary/util/u_draw_quad.h | 26 +----- 2 files changed, 141 insertions(+), 25 deletions(-) create mode 100644 src/gallium/auxiliary/util/u_draw.h (limited to 'src/gallium') diff --git a/src/gallium/auxiliary/util/u_draw.h b/src/gallium/auxiliary/util/u_draw.h new file mode 100644 index 0000000000..b901f97008 --- /dev/null +++ b/src/gallium/auxiliary/util/u_draw.h @@ -0,0 +1,140 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef U_DRAW_H +#define U_DRAW_H + + +#include "pipe/p_compiler.h" +#include "pipe/p_context.h" + + +static INLINE void +util_draw_init_info(struct pipe_draw_info *info) +{ + memset(info, 0, sizeof(*info)); + info->instance_count = 1; + info->max_index = 0xffffffff; +} + + +static INLINE void +util_draw_arrays(struct pipe_context *pipe, uint mode, uint start, uint count) +{ + struct pipe_draw_info info; + + util_draw_init_info(&info); + info.mode = mode; + info.start = start; + info.count = count; + info.min_index = start; + info.max_index = start + count - 1; + + pipe->draw_vbo(pipe, &info); +} + +static INLINE void +util_draw_elements(struct pipe_context *pipe, int index_bias, + uint mode, uint start, uint count) +{ + struct pipe_draw_info info; + + util_draw_init_info(&info); + info.indexed = TRUE; + info.mode = mode; + info.start = start; + info.count = count; + info.index_bias = index_bias; + + pipe->draw_vbo(pipe, &info); +} + +static INLINE void +util_draw_arrays_instanced(struct pipe_context *pipe, + uint mode, uint start, uint count, + uint start_instance, + uint instance_count) +{ + struct pipe_draw_info info; + + util_draw_init_info(&info); + info.mode = mode; + info.start = start; + info.count = count; + info.start_instance = start_instance; + info.instance_count = instance_count; + info.min_index = start; + info.max_index = start + count - 1; + + pipe->draw_vbo(pipe, &info); +} + +static INLINE void +util_draw_elements_instanced(struct pipe_context *pipe, + int index_bias, + uint mode, uint start, uint count, + uint start_instance, + uint instance_count) +{ + struct pipe_draw_info info; + + util_draw_init_info(&info); + info.indexed = TRUE; + info.mode = mode; + info.start = start; + info.count = count; + info.index_bias = index_bias; + info.start_instance = start_instance; + info.instance_count = instance_count; + info.min_index = start; + info.max_index = start + count - 1; + + pipe->draw_vbo(pipe, &info); +} + +static INLINE void +util_draw_range_elements(struct pipe_context *pipe, + int index_bias, + uint min_index, + uint max_index, + uint mode, uint start, uint count) +{ + struct pipe_draw_info info; + + util_draw_init_info(&info); + info.indexed = TRUE; + info.mode = mode; + info.start = start; + info.count = count; + info.index_bias = index_bias; + info.min_index = min_index; + info.max_index = max_index; + + pipe->draw_vbo(pipe, &info); +} + +#endif diff --git a/src/gallium/auxiliary/util/u_draw_quad.h b/src/gallium/auxiliary/util/u_draw_quad.h index 1c9f752611..52994fe05c 100644 --- a/src/gallium/auxiliary/util/u_draw_quad.h +++ b/src/gallium/auxiliary/util/u_draw_quad.h @@ -39,31 +39,7 @@ extern "C" { struct pipe_resource; - -static INLINE void -util_draw_init_info(struct pipe_draw_info *info) -{ - memset(info, 0, sizeof(*info)); - info->instance_count = 1; - info->max_index = 0xffffffff; -} - - -static INLINE void -util_draw_arrays(struct pipe_context *pipe, uint mode, uint start, uint count) -{ - struct pipe_draw_info info; - - util_draw_init_info(&info); - info.mode = mode; - info.start = start; - info.count = count; - info.min_index = start; - info.max_index = start + count - 1; - - pipe->draw_vbo(pipe, &info); -} - +#include "util/u_draw.h" extern void util_draw_vertex_buffer(struct pipe_context *pipe, -- cgit v1.2.3 From 6c288d06ac512be6eb7f19a9005389dd46d5a26a Mon Sep 17 00:00:00 2001 From: Jerome Glisse Date: Fri, 30 Jul 2010 11:10:24 -0400 Subject: r600g: fix typo in tex instruction + shader semantic id fix It seems we never get semantic id from TGSI so fallback to use output number as id. Signed-off-by: Jerome Glisse --- src/gallium/drivers/r600/r600_shader.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index c61cc11e88..dd0e039bf6 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -138,12 +138,10 @@ static int r600_pipe_shader_vs(struct pipe_context *ctx, struct r600_context_sta for (i = 0; i < 10; i++) { state->states[R600_VS_SHADER__SPI_VS_OUT_ID_0 + i] = 0; } - for (i = 0, j = 0; i < rshader->noutput; i++) { - if (rshader->output[i].name != TGSI_SEMANTIC_POSITION) { - tmp = rshader->output[i].sid << ((j & 3) * 8); - state->states[R600_VS_SHADER__SPI_VS_OUT_ID_0 + j / 4] |= tmp; - j++; - } + /* so far never got proper semantic id from tgsi */ + for (i = 0; i < 32; i++) { + tmp = i << ((i & 3) * 8); + state->states[R600_VS_SHADER__SPI_VS_OUT_ID_0 + i / 4] |= tmp; } state->states[R600_VS_SHADER__SPI_VS_OUT_CONFIG] = S_0286C4_VS_EXPORT_COUNT(rshader->noutput - 2); state->states[R600_VS_SHADER__SQ_PGM_RESOURCES_VS] = S_028868_NUM_GPRS(rshader->bc.ngpr); @@ -167,7 +165,7 @@ static int r600_pipe_shader_ps(struct pipe_context *ctx, struct r600_context_sta if (state == NULL) return -ENOMEM; for (i = 0; i < rshader->ninput; i++) { - tmp = S_028644_SEMANTIC(rshader->input[i].sid); + tmp = S_028644_SEMANTIC(i); tmp |= S_028644_SEL_CENTROID(1); if (rshader->input[i].name == TGSI_SEMANTIC_COLOR || rshader->input[i].name == TGSI_SEMANTIC_BCOLOR) { @@ -525,6 +523,7 @@ static int tgsi_op2(struct r600_shader_ctx *ctx) memset(&alu, 0, sizeof(struct r600_bc_alu)); if (!(inst->Dst[0].Register.WriteMask & (1 << i))) { alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP; + alu.dst.chan = i; } else { alu.inst = ctx->inst_info->r600_opcode; for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { @@ -567,6 +566,7 @@ static int tgsi_slt(struct r600_shader_ctx *ctx) memset(&alu, 0, sizeof(struct r600_bc_alu)); if (!(inst->Dst[0].Register.WriteMask & (1 << i))) { alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP; + alu.dst.chan = i; } else { alu.inst = ctx->inst_info->r600_opcode; r = tgsi_src(ctx, &inst->Src[0], i, &alu.src[1]); @@ -747,6 +747,7 @@ static int tgsi_helper_copy(struct r600_shader_ctx *ctx, struct tgsi_full_instru memset(&alu, 0, sizeof(struct r600_bc_alu)); if (!(inst->Dst[0].Register.WriteMask & (1 << i))) { alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP; + alu.dst.chan = i; } else { alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV; r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); @@ -918,7 +919,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) tex.resource_id = ctx->file_offset[inst->Src[1].Register.File] + inst->Src[1].Register.Index; tex.sampler_id = tex.resource_id; tex.src_gpr = src_gpr; - tex.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Src[0].Register.Index; + tex.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index; tex.dst_sel_x = 0; tex.dst_sel_y = 1; tex.dst_sel_z = 2; -- cgit v1.2.3 From 042018a943a403a4d9887b400deb3b3c83ee40c0 Mon Sep 17 00:00:00 2001 From: Zack Rusin Date: Tue, 27 Jul 2010 12:26:54 -0400 Subject: llvmpipe: delete function bodies after generating machine code --- src/gallium/auxiliary/draw/draw_llvm.c | 2 ++ src/gallium/auxiliary/gallivm/lp_bld_init.h | 2 ++ src/gallium/auxiliary/gallivm/lp_bld_misc.cpp | 8 ++++++++ src/gallium/drivers/llvmpipe/lp_state_fs.c | 1 + 4 files changed, 13 insertions(+) (limited to 'src/gallium') diff --git a/src/gallium/auxiliary/draw/draw_llvm.c b/src/gallium/auxiliary/draw/draw_llvm.c index 48489e5f6f..8022b720b3 100644 --- a/src/gallium/auxiliary/draw/draw_llvm.c +++ b/src/gallium/auxiliary/draw/draw_llvm.c @@ -826,6 +826,7 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant) if (gallivm_debug & GALLIVM_DEBUG_ASM) { lp_disassemble(code); } + lp_func_delete_body(variant->function); } @@ -1001,6 +1002,7 @@ draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *varian if (gallivm_debug & GALLIVM_DEBUG_ASM) { lp_disassemble(code); } + lp_func_delete_body(variant->function_elts); } void diff --git a/src/gallium/auxiliary/gallivm/lp_bld_init.h b/src/gallium/auxiliary/gallivm/lp_bld_init.h index a32ced9b4c..f26fdac466 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_init.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_init.h @@ -44,5 +44,7 @@ extern LLVMPassManagerRef lp_build_pass; void lp_build_init(void); +extern void +lp_func_delete_body(LLVMValueRef func); #endif /* !LP_BLD_INIT_H */ diff --git a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp index 072408b268..6d5410d970 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp +++ b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp @@ -166,3 +166,11 @@ lp_set_target_options(void) } #endif } + + +extern "C" void +lp_func_delete_body(LLVMValueRef FF) +{ + llvm::Function *func = llvm::unwrap(FF); + func->deleteBody(); +} diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c index dbcc286417..5ee5bde184 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c @@ -715,6 +715,7 @@ generate_fragment(struct llvmpipe_context *lp, if (gallivm_debug & GALLIVM_DEBUG_ASM) { lp_disassemble(f); } + lp_func_delete_body(function); } } -- cgit v1.2.3 From 0e7d7d3051d125621b2b79c11f577fc3a3cc7b2c Mon Sep 17 00:00:00 2001 From: Chia-I Wu Date: Fri, 30 Jul 2010 23:44:16 +0800 Subject: util: Fix the range of util_draw_elements_instanced. Keep min_index and max_index at their defaults (0 and ~0). --- src/gallium/auxiliary/util/u_draw.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/auxiliary/util/u_draw.h b/src/gallium/auxiliary/util/u_draw.h index b901f97008..2a91ea0f9a 100644 --- a/src/gallium/auxiliary/util/u_draw.h +++ b/src/gallium/auxiliary/util/u_draw.h @@ -110,8 +110,6 @@ util_draw_elements_instanced(struct pipe_context *pipe, info.index_bias = index_bias; info.start_instance = start_instance; info.instance_count = instance_count; - info.min_index = start; - info.max_index = start + count - 1; pipe->draw_vbo(pipe, &info); } -- cgit v1.2.3 From 078eff659a7ef90691966d983f35ed9e4ce63901 Mon Sep 17 00:00:00 2001 From: Vinson Lee Date: Fri, 30 Jul 2010 11:48:48 -0700 Subject: llvmpipe: Fix implicit declaration of lp_func_delete_body warnings. --- src/gallium/auxiliary/draw/draw_llvm.c | 1 + src/gallium/drivers/llvmpipe/lp_state_fs.c | 1 + 2 files changed, 2 insertions(+) (limited to 'src/gallium') diff --git a/src/gallium/auxiliary/draw/draw_llvm.c b/src/gallium/auxiliary/draw/draw_llvm.c index 8022b720b3..de99b00a81 100644 --- a/src/gallium/auxiliary/draw/draw_llvm.c +++ b/src/gallium/auxiliary/draw/draw_llvm.c @@ -38,6 +38,7 @@ #include "gallivm/lp_bld_tgsi.h" #include "gallivm/lp_bld_printf.h" #include "gallivm/lp_bld_intr.h" +#include "gallivm/lp_bld_init.h" #include "tgsi/tgsi_exec.h" #include "tgsi/tgsi_dump.h" diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c index 5ee5bde184..dbca49a2ef 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c @@ -75,6 +75,7 @@ #include "gallivm/lp_bld_type.h" #include "gallivm/lp_bld_const.h" #include "gallivm/lp_bld_conv.h" +#include "gallivm/lp_bld_init.h" #include "gallivm/lp_bld_intr.h" #include "gallivm/lp_bld_logic.h" #include "gallivm/lp_bld_tgsi.h" -- cgit v1.2.3 From ce1fed1659ee0af063c6bace8a1aba879b85e775 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 30 Jul 2010 13:28:42 -0600 Subject: tgsi: remove incorrect assertion --- src/gallium/auxiliary/tgsi/tgsi_exec.c | 1 - 1 file changed, 1 deletion(-) (limited to 'src/gallium') diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c index 6fcbf4f212..298f3d0a8b 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.c +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c @@ -651,7 +651,6 @@ tgsi_exec_machine_bind_shader( if (numSamplers) { assert(samplers); - assert(samplers[0]); } mach->Tokens = tokens; -- cgit v1.2.3 From dd406cf34196a5a60362d8e1928b1308b56dd3f8 Mon Sep 17 00:00:00 2001 From: Zack Rusin Date: Fri, 30 Jul 2010 15:42:06 -0400 Subject: draw: actually a noop, rather than not implemented we just have nothing to do in it right now --- src/gallium/auxiliary/draw/draw_gs.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'src/gallium') diff --git a/src/gallium/auxiliary/draw/draw_gs.c b/src/gallium/auxiliary/draw/draw_gs.c index a36321d910..cff859a42b 100644 --- a/src/gallium/auxiliary/draw/draw_gs.c +++ b/src/gallium/auxiliary/draw/draw_gs.c @@ -75,7 +75,10 @@ draw_gs_set_constants(struct draw_context *draw, const void *constants, unsigned size) { - debug_printf("draw_gs_set_constants() not implemented yet!\n"); + /* noop. added here for symmetry with the VS + * code and in case we'll ever want to allign + * the constants, e.g. when we'll change to a + * different interpreter */ } -- cgit v1.2.3 From 79ab5b9798c911e7612e26616df82e98372549bf Mon Sep 17 00:00:00 2001 From: Vinson Lee Date: Fri, 30 Jul 2010 13:11:14 -0700 Subject: llvmpipe: Silence unused value warning. --- src/gallium/drivers/llvmpipe/lp_test_printf.c | 1 + 1 file changed, 1 insertion(+) (limited to 'src/gallium') diff --git a/src/gallium/drivers/llvmpipe/lp_test_printf.c b/src/gallium/drivers/llvmpipe/lp_test_printf.c index a3447bf53f..4653f30e39 100644 --- a/src/gallium/drivers/llvmpipe/lp_test_printf.c +++ b/src/gallium/drivers/llvmpipe/lp_test_printf.c @@ -120,6 +120,7 @@ test_printf(unsigned verbose, FILE *fp, const struct printf_test_case *testcase) abort(); } #else + (void) provider; engine = lp_build_engine; #endif -- cgit v1.2.3 From 2e4ad14a525f4028f0e0a93de2f8db785df33fb7 Mon Sep 17 00:00:00 2001 From: George Sapountzis Date: Thu, 8 Apr 2010 18:58:35 +0300 Subject: st/dri: mv driDriverAPI to backends --- src/gallium/state_trackers/dri/common/dri_screen.c | 44 +--------------------- src/gallium/state_trackers/dri/common/dri_screen.h | 3 ++ src/gallium/state_trackers/dri/drm/dri2.c | 29 ++++++++++++-- src/gallium/state_trackers/dri/drm/dri2.h | 37 ------------------ src/gallium/state_trackers/dri/sw/drisw.c | 23 +++++++++-- src/gallium/state_trackers/dri/sw/drisw.h | 43 --------------------- 6 files changed, 49 insertions(+), 130 deletions(-) delete mode 100644 src/gallium/state_trackers/dri/drm/dri2.h delete mode 100644 src/gallium/state_trackers/dri/sw/drisw.h (limited to 'src/gallium') diff --git a/src/gallium/state_trackers/dri/common/dri_screen.c b/src/gallium/state_trackers/dri/common/dri_screen.c index 25cad8d46c..a2bccefd6c 100644 --- a/src/gallium/state_trackers/dri/common/dri_screen.c +++ b/src/gallium/state_trackers/dri/common/dri_screen.c @@ -30,20 +30,12 @@ */ #include "utils.h" -#ifndef __NOT_HAVE_DRM_H -#include "vblank.h" -#endif #include "xmlpool.h" #include "dri_screen.h" #include "dri_context.h" #include "dri_drawable.h" #include "dri1_helper.h" -#ifndef __NOT_HAVE_DRM_H -#include "dri2.h" -#else -#include "drisw.h" -#endif #include "util/u_inlines.h" #include "pipe/p_screen.h" @@ -366,7 +358,7 @@ dri_destroy_screen_helper(struct dri_screen * screen) dri_destroy_option_cache(screen); } -static void +void dri_destroy_screen(__DRIscreen * sPriv) { struct dri_screen *screen = dri_screen(sPriv); @@ -402,38 +394,4 @@ dri_init_screen_helper(struct dri_screen *screen, return dri_fill_in_modes(screen, pixel_bits); } -/** - * DRI driver virtual function table. - * - * DRI versions differ in their implementation of init_screen and swap_buffers. - */ -const struct __DriverAPIRec driDriverAPI = { - .DestroyScreen = dri_destroy_screen, - .CreateContext = dri_create_context, - .DestroyContext = dri_destroy_context, - .CreateBuffer = dri_create_buffer, - .DestroyBuffer = dri_destroy_buffer, - .MakeCurrent = dri_make_current, - .UnbindContext = dri_unbind_context, - -#ifndef __NOT_HAVE_DRM_H - - .GetSwapInfo = NULL, - .GetDrawableMSC = NULL, - .WaitForMSC = NULL, - .InitScreen2 = dri2_init_screen, - - .InitScreen = NULL, - .SwapBuffers = NULL, - .CopySubBuffer = NULL, - -#else - - .InitScreen = drisw_init_screen, - .SwapBuffers = drisw_swap_buffers, - -#endif - -}; - /* vim: set sw=3 ts=8 sts=3 expandtab: */ diff --git a/src/gallium/state_trackers/dri/common/dri_screen.h b/src/gallium/state_trackers/dri/common/dri_screen.h index 087ae8d2a4..cfbebb3341 100644 --- a/src/gallium/state_trackers/dri/common/dri_screen.h +++ b/src/gallium/state_trackers/dri/common/dri_screen.h @@ -132,6 +132,9 @@ dri_init_screen_helper(struct dri_screen *screen, void dri_destroy_screen_helper(struct dri_screen * screen); +void +dri_destroy_screen(__DRIscreen * sPriv); + #endif /* vim: set sw=3 ts=8 sts=3 expandtab: */ diff --git a/src/gallium/state_trackers/dri/drm/dri2.c b/src/gallium/state_trackers/dri/drm/dri2.c index 5c6573fa69..9965d706c8 100644 --- a/src/gallium/state_trackers/dri/drm/dri2.c +++ b/src/gallium/state_trackers/dri/drm/dri2.c @@ -38,9 +38,6 @@ #include "dri_screen.h" #include "dri_context.h" #include "dri_drawable.h" -#include "dri2.h" - -#include "GL/internal/dri_interface.h" /** * DRI2 flush extension. @@ -497,7 +494,7 @@ static const __DRIextension *dri_screen_extensions[] = { * * Returns the __GLcontextModes supported by this driver. */ -const __DRIconfig ** +static const __DRIconfig ** dri2_init_screen(__DRIscreen * sPriv) { const __DRIconfig **configs; @@ -534,6 +531,30 @@ fail: return NULL; } +/** + * DRI driver virtual function table. + * + * DRI versions differ in their implementation of init_screen and swap_buffers. + */ +const struct __DriverAPIRec driDriverAPI = { + .DestroyScreen = dri_destroy_screen, + .CreateContext = dri_create_context, + .DestroyContext = dri_destroy_context, + .CreateBuffer = dri_create_buffer, + .DestroyBuffer = dri_destroy_buffer, + .MakeCurrent = dri_make_current, + .UnbindContext = dri_unbind_context, + + .GetSwapInfo = NULL, + .GetDrawableMSC = NULL, + .WaitForMSC = NULL, + .InitScreen2 = dri2_init_screen, + + .InitScreen = NULL, + .SwapBuffers = NULL, + .CopySubBuffer = NULL, +}; + /* This is the table of extensions that the loader will dlsym() for. */ PUBLIC const __DRIextension *__driDriverExtensions[] = { &driCoreExtension.base, diff --git a/src/gallium/state_trackers/dri/drm/dri2.h b/src/gallium/state_trackers/dri/drm/dri2.h deleted file mode 100644 index 07adfe4f6c..0000000000 --- a/src/gallium/state_trackers/dri/drm/dri2.h +++ /dev/null @@ -1,37 +0,0 @@ -/************************************************************************** - * - * Copyright 2009, VMware, Inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#ifndef DRI2_H -#define DRI2_H - -#include "dri_drawable.h" -#include "dri_wrapper.h" - -const __DRIconfig ** -dri2_init_screen(__DRIscreen * sPriv); - -#endif /* DRI2_H */ diff --git a/src/gallium/state_trackers/dri/sw/drisw.c b/src/gallium/state_trackers/dri/sw/drisw.c index 86297c3f80..9edade9dc9 100644 --- a/src/gallium/state_trackers/dri/sw/drisw.c +++ b/src/gallium/state_trackers/dri/sw/drisw.c @@ -44,7 +44,6 @@ #include "dri_context.h" #include "dri_drawable.h" #include "dri1_helper.h" -#include "drisw.h" DEBUG_GET_ONCE_BOOL_OPTION(swrast_no_present, "SWRAST_NO_PRESENT", FALSE); static boolean swrast_no_present = FALSE; @@ -133,7 +132,7 @@ drisw_copy_to_front(__DRIdrawable * dPriv, * Backend functions for st_framebuffer interface and swap_buffers. */ -void +static void drisw_swap_buffers(__DRIdrawable *dPriv) { struct dri_context *ctx = dri_get_current(dPriv->driScreenPriv); @@ -250,7 +249,7 @@ static struct drisw_loader_funcs drisw_lf = { .put_image = drisw_put_image }; -const __DRIconfig ** +static const __DRIconfig ** drisw_init_screen(__DRIscreen * sPriv) { const __DRIconfig **configs; @@ -286,6 +285,24 @@ fail: return NULL; } +/** + * DRI driver virtual function table. + * + * DRI versions differ in their implementation of init_screen and swap_buffers. + */ +const struct __DriverAPIRec driDriverAPI = { + .DestroyScreen = dri_destroy_screen, + .CreateContext = dri_create_context, + .DestroyContext = dri_destroy_context, + .CreateBuffer = dri_create_buffer, + .DestroyBuffer = dri_destroy_buffer, + .MakeCurrent = dri_make_current, + .UnbindContext = dri_unbind_context, + + .InitScreen = drisw_init_screen, + .SwapBuffers = drisw_swap_buffers, +}; + /* This is the table of extensions that the loader will dlsym() for. */ PUBLIC const __DRIextension *__driDriverExtensions[] = { &driCoreExtension.base, diff --git a/src/gallium/state_trackers/dri/sw/drisw.h b/src/gallium/state_trackers/dri/sw/drisw.h deleted file mode 100644 index 6c6c891f35..0000000000 --- a/src/gallium/state_trackers/dri/sw/drisw.h +++ /dev/null @@ -1,43 +0,0 @@ -/************************************************************************** - * - * Copyright 2009, VMware, Inc. - * All Rights Reserved. - * Copyright 2010 George Sapountzis - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#ifndef DRISW_H -#define DRISW_H - -#include "dri_context.h" -#include "dri_drawable.h" - -#include "state_tracker/st_api.h" -#include "dri_wrapper.h" - -const __DRIconfig ** -drisw_init_screen(__DRIscreen * sPriv); - -void drisw_swap_buffers(__DRIdrawable * dPriv); - -#endif /* DRISW_H */ -- cgit v1.2.3 From a30b966f8345cb99922a416fce2da6edb70f864c Mon Sep 17 00:00:00 2001 From: George Sapountzis Date: Wed, 26 May 2010 01:08:15 +0300 Subject: st/dri: drop dri1_helper --- .../state_trackers/dri/common/dri1_helper.c | 129 --------------------- .../state_trackers/dri/common/dri1_helper.h | 61 ---------- .../state_trackers/dri/common/dri_drawable.c | 9 +- .../state_trackers/dri/common/dri_drawable.h | 14 +-- src/gallium/state_trackers/dri/common/dri_screen.c | 3 - src/gallium/state_trackers/dri/common/dri_screen.h | 5 +- src/gallium/state_trackers/dri/drm/Makefile | 1 - src/gallium/state_trackers/dri/drm/SConscript | 1 - src/gallium/state_trackers/dri/drm/dri1_helper.c | 1 - src/gallium/state_trackers/dri/sw/Makefile | 1 - src/gallium/state_trackers/dri/sw/SConscript | 1 - src/gallium/state_trackers/dri/sw/dri1_helper.c | 1 - src/gallium/state_trackers/dri/sw/drisw.c | 25 +++- 13 files changed, 23 insertions(+), 229 deletions(-) delete mode 100644 src/gallium/state_trackers/dri/common/dri1_helper.c delete mode 100644 src/gallium/state_trackers/dri/common/dri1_helper.h delete mode 120000 src/gallium/state_trackers/dri/drm/dri1_helper.c delete mode 120000 src/gallium/state_trackers/dri/sw/dri1_helper.c (limited to 'src/gallium') diff --git a/src/gallium/state_trackers/dri/common/dri1_helper.c b/src/gallium/state_trackers/dri/common/dri1_helper.c deleted file mode 100644 index ad6c7d3750..0000000000 --- a/src/gallium/state_trackers/dri/common/dri1_helper.c +++ /dev/null @@ -1,129 +0,0 @@ -/************************************************************************** - * - * Copyright 2009, VMware, Inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ -/* - * Management of pipe objects (surface / pipe / fences) used by DRI1 and DRISW. - * - * Author: Keith Whitwell - * Author: Jakob Bornecrantz - */ - -#include "util/u_inlines.h" -#include "pipe/p_context.h" - -#include "dri_screen.h" -#include "dri_context.h" -#include "dri_drawable.h" -#include "dri1_helper.h" - -struct pipe_fence_handle * -dri1_swap_fences_pop_front(struct dri_drawable *draw) -{ - struct pipe_screen *screen = dri_screen(draw->sPriv)->base.screen; - struct pipe_fence_handle *fence = NULL; - - if (draw->cur_fences >= draw->desired_fences) { - screen->fence_reference(screen, &fence, draw->swap_fences[draw->tail]); - screen->fence_reference(screen, &draw->swap_fences[draw->tail++], NULL); - --draw->cur_fences; - draw->tail &= DRI_SWAP_FENCES_MASK; - } - return fence; -} - -void -dri1_swap_fences_push_back(struct dri_drawable *draw, - struct pipe_fence_handle *fence) -{ - struct pipe_screen *screen = dri_screen(draw->sPriv)->base.screen; - - if (!fence) - return; - - if (draw->cur_fences < DRI_SWAP_FENCES_MAX) { - draw->cur_fences++; - screen->fence_reference(screen, &draw->swap_fences[draw->head++], - fence); - draw->head &= DRI_SWAP_FENCES_MASK; - } -} - -void -dri1_swap_fences_clear(struct dri_drawable *drawable) -{ - struct pipe_screen *screen = dri_screen(drawable->sPriv)->base.screen; - struct pipe_fence_handle *fence; - - while (drawable->cur_fences) { - fence = dri1_swap_fences_pop_front(drawable); - screen->fence_reference(screen, &fence, NULL); - } -} - -struct pipe_surface * -dri1_get_pipe_surface(struct dri_drawable *drawable, struct pipe_resource *ptex) -{ - struct pipe_screen *pipe_screen = dri_screen(drawable->sPriv)->base.screen; - struct pipe_surface *psurf = drawable->dri1_surface; - - if (!psurf || psurf->texture != ptex) { - pipe_surface_reference(&drawable->dri1_surface, NULL); - - drawable->dri1_surface = pipe_screen->get_tex_surface(pipe_screen, - ptex, 0, 0, 0, 0/* no bind flag???*/); - - psurf = drawable->dri1_surface; - } - - return psurf; -} - -void -dri1_destroy_pipe_surface(struct dri_drawable *drawable) -{ - pipe_surface_reference(&drawable->dri1_surface, NULL); -} - -struct pipe_context * -dri1_get_pipe_context(struct dri_screen *screen) -{ - struct pipe_context *pipe = screen->dri1_pipe; - - if (!pipe) { - screen->dri1_pipe = - screen->base.screen->context_create(screen->base.screen, NULL); - pipe = screen->dri1_pipe; - } - - return pipe; -} - -void -dri1_destroy_pipe_context(struct dri_screen *screen) -{ - if (screen->dri1_pipe) - screen->dri1_pipe->destroy(screen->dri1_pipe); -} diff --git a/src/gallium/state_trackers/dri/common/dri1_helper.h b/src/gallium/state_trackers/dri/common/dri1_helper.h deleted file mode 100644 index c98adf2df2..0000000000 --- a/src/gallium/state_trackers/dri/common/dri1_helper.h +++ /dev/null @@ -1,61 +0,0 @@ -/************************************************************************** - * - * Copyright 2009, VMware, Inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ -/* - * Author: Keith Whitwell - * Author: Jakob Bornecrantz - */ - -#ifndef DRI1_HELPER_H -#define DRI1_HELPER_H - -#include "dri_screen.h" -#include "dri_context.h" -#include "dri_drawable.h" - -struct pipe_fence_handle * -dri1_swap_fences_pop_front(struct dri_drawable *draw); - -void -dri1_swap_fences_push_back(struct dri_drawable *draw, - struct pipe_fence_handle *fence); - -void -dri1_swap_fences_clear(struct dri_drawable *drawable); - -struct pipe_surface * -dri1_get_pipe_surface(struct dri_drawable *drawable, struct pipe_resource *ptex); - -void -dri1_destroy_pipe_surface(struct dri_drawable *drawable); - -struct pipe_context * -dri1_get_pipe_context(struct dri_screen *screen); - -void -dri1_destroy_pipe_context(struct dri_screen *screen); - -#endif /* DRI1_HELPER_H */ diff --git a/src/gallium/state_trackers/dri/common/dri_drawable.c b/src/gallium/state_trackers/dri/common/dri_drawable.c index 2bc0faffef..be824e7e3f 100644 --- a/src/gallium/state_trackers/dri/common/dri_drawable.c +++ b/src/gallium/state_trackers/dri/common/dri_drawable.c @@ -32,7 +32,6 @@ #include "dri_screen.h" #include "dri_context.h" #include "dri_drawable.h" -#include "dri1_helper.h" #include "pipe/p_screen.h" #include "util/u_format.h" @@ -138,8 +137,6 @@ dri_create_buffer(__DRIscreen * sPriv, drawable->dPriv = dPriv; dPriv->driverPrivate = (void *)drawable; - drawable->desired_fences = 2; - return GL_TRUE; fail: FREE(drawable); @@ -152,15 +149,11 @@ dri_destroy_buffer(__DRIdrawable * dPriv) struct dri_drawable *drawable = dri_drawable(dPriv); int i; - dri1_swap_fences_clear(drawable); - - dri1_destroy_pipe_surface(drawable); + pipe_surface_reference(&drawable->drisw_surface, NULL); for (i = 0; i < ST_ATTACHMENT_COUNT; i++) pipe_resource_reference(&drawable->textures[i], NULL); - drawable->desired_fences = 0; - FREE(drawable); } diff --git a/src/gallium/state_trackers/dri/common/dri_drawable.h b/src/gallium/state_trackers/dri/common/dri_drawable.h index 5fd650ac88..62c7b0d41a 100644 --- a/src/gallium/state_trackers/dri/common/dri_drawable.h +++ b/src/gallium/state_trackers/dri/common/dri_drawable.h @@ -33,13 +33,9 @@ #include "state_tracker/st_api.h" struct pipe_surface; -struct pipe_fence_handle; struct st_framebuffer; struct dri_context; -#define DRI_SWAP_FENCES_MAX 8 -#define DRI_SWAP_FENCES_MASK 7 - struct dri_drawable { struct st_framebuffer_iface base; @@ -57,14 +53,8 @@ struct dri_drawable struct pipe_resource *textures[ST_ATTACHMENT_COUNT]; unsigned int texture_mask, texture_stamp; - struct pipe_fence_handle *swap_fences[DRI_SWAP_FENCES_MAX]; - unsigned int head; - unsigned int tail; - unsigned int desired_fences; - unsigned int cur_fences; - - /* used only by DRI1 */ - struct pipe_surface *dri1_surface; + /* used only by DRISW */ + struct pipe_surface *drisw_surface; }; static INLINE struct dri_drawable * diff --git a/src/gallium/state_trackers/dri/common/dri_screen.c b/src/gallium/state_trackers/dri/common/dri_screen.c index a2bccefd6c..ed302e37c1 100644 --- a/src/gallium/state_trackers/dri/common/dri_screen.c +++ b/src/gallium/state_trackers/dri/common/dri_screen.c @@ -35,7 +35,6 @@ #include "dri_screen.h" #include "dri_context.h" #include "dri_drawable.h" -#include "dri1_helper.h" #include "util/u_inlines.h" #include "pipe/p_screen.h" @@ -347,8 +346,6 @@ dri_destroy_option_cache(struct dri_screen * screen) void dri_destroy_screen_helper(struct dri_screen * screen) { - dri1_destroy_pipe_context(screen); - if (screen->st_api && screen->st_api->destroy) screen->st_api->destroy(screen->st_api); diff --git a/src/gallium/state_trackers/dri/common/dri_screen.h b/src/gallium/state_trackers/dri/common/dri_screen.h index cfbebb3341..e27ff9d98e 100644 --- a/src/gallium/state_trackers/dri/common/dri_screen.h +++ b/src/gallium/state_trackers/dri/common/dri_screen.h @@ -64,7 +64,7 @@ struct dri_screen int fd; drmLock *drmLock; - /* hooks filled in by dri1, dri2 & drisw */ + /* hooks filled in by dri2 & drisw */ __DRIimage * (*lookup_egl_image)(struct dri_context *ctx, void *handle); void (*allocate_textures)(struct dri_drawable *drawable, const enum st_attachment_type *statts, @@ -77,9 +77,6 @@ struct dri_screen boolean d_depth_bits_last; boolean sd_depth_bits_last; boolean auto_fake_front; - - /* used only by DRI1 */ - struct pipe_context *dri1_pipe; }; /** cast wrapper */ diff --git a/src/gallium/state_trackers/dri/drm/Makefile b/src/gallium/state_trackers/dri/drm/Makefile index 94fa61fec7..c717b2bdeb 100644 --- a/src/gallium/state_trackers/dri/drm/Makefile +++ b/src/gallium/state_trackers/dri/drm/Makefile @@ -17,7 +17,6 @@ C_SOURCES = \ dri_context.c \ dri_screen.c \ dri_drawable.c \ - dri1_helper.c \ dri2.c # $(TOP)/src/mesa/drivers/dri/common/utils.c \ diff --git a/src/gallium/state_trackers/dri/drm/SConscript b/src/gallium/state_trackers/dri/drm/SConscript index 0c279d2236..2a0af65f9b 100644 --- a/src/gallium/state_trackers/dri/drm/SConscript +++ b/src/gallium/state_trackers/dri/drm/SConscript @@ -21,7 +21,6 @@ if env['dri']: source = [ 'dri_context.c', 'dri_drawable.c', 'dri_screen.c', - 'dri1_helper.c', 'dri2.c', ] ) diff --git a/src/gallium/state_trackers/dri/drm/dri1_helper.c b/src/gallium/state_trackers/dri/drm/dri1_helper.c deleted file mode 120000 index c45ebf5c10..0000000000 --- a/src/gallium/state_trackers/dri/drm/dri1_helper.c +++ /dev/null @@ -1 +0,0 @@ -../common/dri1_helper.c \ No newline at end of file diff --git a/src/gallium/state_trackers/dri/sw/Makefile b/src/gallium/state_trackers/dri/sw/Makefile index a1dadeba5e..33bc0ed9c9 100644 --- a/src/gallium/state_trackers/dri/sw/Makefile +++ b/src/gallium/state_trackers/dri/sw/Makefile @@ -20,7 +20,6 @@ C_SOURCES = \ dri_context.c \ dri_screen.c \ dri_drawable.c \ - dri1_helper.c \ drisw.c include ../../../Makefile.template diff --git a/src/gallium/state_trackers/dri/sw/SConscript b/src/gallium/state_trackers/dri/sw/SConscript index 0c5194d6ed..d2eb66668e 100644 --- a/src/gallium/state_trackers/dri/sw/SConscript +++ b/src/gallium/state_trackers/dri/sw/SConscript @@ -21,7 +21,6 @@ if env['dri']: source = [ 'dri_context.c', 'dri_drawable.c', 'dri_screen.c', - 'dri1_helper.c', 'drisw.c', ] ) diff --git a/src/gallium/state_trackers/dri/sw/dri1_helper.c b/src/gallium/state_trackers/dri/sw/dri1_helper.c deleted file mode 120000 index c45ebf5c10..0000000000 --- a/src/gallium/state_trackers/dri/sw/dri1_helper.c +++ /dev/null @@ -1 +0,0 @@ -../common/dri1_helper.c \ No newline at end of file diff --git a/src/gallium/state_trackers/dri/sw/drisw.c b/src/gallium/state_trackers/dri/sw/drisw.c index 9edade9dc9..d2210098d1 100644 --- a/src/gallium/state_trackers/dri/sw/drisw.c +++ b/src/gallium/state_trackers/dri/sw/drisw.c @@ -43,7 +43,6 @@ #include "dri_screen.h" #include "dri_context.h" #include "dri_drawable.h" -#include "dri1_helper.h" DEBUG_GET_ONCE_BOOL_OPTION(swrast_no_present, "SWRAST_NO_PRESENT", FALSE); static boolean swrast_no_present = FALSE; @@ -88,6 +87,24 @@ drisw_put_image(struct dri_drawable *drawable, put_image(dPriv, data, width, height); } +static struct pipe_surface * +drisw_get_pipe_surface(struct dri_drawable *drawable, struct pipe_resource *ptex) +{ + struct pipe_screen *pipe_screen = dri_screen(drawable->sPriv)->base.screen; + struct pipe_surface *psurf = drawable->drisw_surface; + + if (!psurf || psurf->texture != ptex) { + pipe_surface_reference(&drawable->drisw_surface, NULL); + + drawable->drisw_surface = pipe_screen->get_tex_surface(pipe_screen, + ptex, 0, 0, 0, 0/* no bind flag???*/); + + psurf = drawable->drisw_surface; + } + + return psurf; +} + static INLINE void drisw_present_texture(__DRIdrawable *dPriv, struct pipe_resource *ptex) @@ -99,7 +116,7 @@ drisw_present_texture(__DRIdrawable *dPriv, if (swrast_no_present) return; - psurf = dri1_get_pipe_surface(drawable, ptex); + psurf = drisw_get_pipe_surface(drawable, ptex); if (!psurf) return; @@ -174,10 +191,6 @@ drisw_flush_frontbuffer(struct dri_drawable *drawable, * During fixed-size operation, the function keeps allocating new attachments * as they are requested. Unused attachments are not removed, not until the * framebuffer is resized or destroyed. - * - * It should be possible for DRI1 and DRISW to share this function, but it - * seems a better seperation and safer for each DRI version to provide its own - * function. */ static void drisw_allocate_textures(struct dri_drawable *drawable, -- cgit v1.2.3 From 873ddf547d5aeb68f37a172d73131c6bc51101f6 Mon Sep 17 00:00:00 2001 From: George Sapountzis Date: Sun, 18 Jul 2010 18:23:36 +0300 Subject: st/dri: move backend hooks to appropriate object --- .../state_trackers/dri/common/dri_context.h | 3 ++ .../state_trackers/dri/common/dri_drawable.c | 9 ++--- .../state_trackers/dri/common/dri_drawable.h | 10 +++++ src/gallium/state_trackers/dri/common/dri_screen.c | 5 +-- src/gallium/state_trackers/dri/common/dri_screen.h | 9 ----- src/gallium/state_trackers/dri/drm/dri2.c | 45 ++++++++++++++++++---- src/gallium/state_trackers/dri/sw/drisw.c | 26 ++++++++++--- 7 files changed, 78 insertions(+), 29 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/state_trackers/dri/common/dri_context.h b/src/gallium/state_trackers/dri/common/dri_context.h index b29e853383..692c49d7cd 100644 --- a/src/gallium/state_trackers/dri/common/dri_context.h +++ b/src/gallium/state_trackers/dri/common/dri_context.h @@ -60,6 +60,9 @@ struct dri_context /* gallium */ struct st_context_iface *st; + + /* hooks filled in by dri2 & drisw */ + __DRIimage * (*lookup_egl_image)(struct dri_context *ctx, void *handle); }; static INLINE struct dri_context * diff --git a/src/gallium/state_trackers/dri/common/dri_drawable.c b/src/gallium/state_trackers/dri/common/dri_drawable.c index be824e7e3f..88b79a0a76 100644 --- a/src/gallium/state_trackers/dri/common/dri_drawable.c +++ b/src/gallium/state_trackers/dri/common/dri_drawable.c @@ -67,10 +67,10 @@ dri_st_framebuffer_validate(struct st_framebuffer_iface *stfbi, new_stamp = (drawable->texture_stamp != drawable->dPriv->lastStamp); if (new_stamp || new_mask || screen->broken_invalidate) { - if (new_stamp && screen->update_drawable_info) - screen->update_drawable_info(drawable); + if (new_stamp && drawable->update_drawable_info) + drawable->update_drawable_info(drawable); - screen->allocate_textures(drawable, statts, count); + drawable->allocate_textures(drawable, statts, count); /* add existing textures */ for (i = 0; i < ST_ATTACHMENT_COUNT; i++) { @@ -99,10 +99,9 @@ dri_st_framebuffer_flush_front(struct st_framebuffer_iface *stfbi, { struct dri_drawable *drawable = (struct dri_drawable *) stfbi->st_manager_private; - struct dri_screen *screen = dri_screen(drawable->sPriv); /* XXX remove this and just set the correct one on the framebuffer */ - screen->flush_frontbuffer(drawable, statt); + drawable->flush_frontbuffer(drawable, statt); return TRUE; } diff --git a/src/gallium/state_trackers/dri/common/dri_drawable.h b/src/gallium/state_trackers/dri/common/dri_drawable.h index 62c7b0d41a..3f2e24fc15 100644 --- a/src/gallium/state_trackers/dri/common/dri_drawable.h +++ b/src/gallium/state_trackers/dri/common/dri_drawable.h @@ -55,6 +55,16 @@ struct dri_drawable /* used only by DRISW */ struct pipe_surface *drisw_surface; + + /* hooks filled in by dri2 & drisw */ + void (*allocate_textures)(struct dri_drawable *drawable, + const enum st_attachment_type *statts, + unsigned count); + + void (*update_drawable_info)(struct dri_drawable *drawable); + + void (*flush_frontbuffer)(struct dri_drawable *drawable, + enum st_attachment_type statt); }; static INLINE struct dri_drawable * diff --git a/src/gallium/state_trackers/dri/common/dri_screen.c b/src/gallium/state_trackers/dri/common/dri_screen.c index ed302e37c1..e869b4b9b1 100644 --- a/src/gallium/state_trackers/dri/common/dri_screen.c +++ b/src/gallium/state_trackers/dri/common/dri_screen.c @@ -294,11 +294,10 @@ dri_get_egl_image(struct st_manager *smapi, { struct dri_context *ctx = (struct dri_context *)stctxi->st_manager_private; - struct dri_screen *screen = dri_screen(ctx->sPriv); __DRIimage *img = NULL; - if (screen->lookup_egl_image) { - img = screen->lookup_egl_image(ctx, egl_image); + if (ctx->lookup_egl_image) { + img = ctx->lookup_egl_image(ctx, egl_image); } if (!img) diff --git a/src/gallium/state_trackers/dri/common/dri_screen.h b/src/gallium/state_trackers/dri/common/dri_screen.h index e27ff9d98e..53ccce145b 100644 --- a/src/gallium/state_trackers/dri/common/dri_screen.h +++ b/src/gallium/state_trackers/dri/common/dri_screen.h @@ -64,15 +64,6 @@ struct dri_screen int fd; drmLock *drmLock; - /* hooks filled in by dri2 & drisw */ - __DRIimage * (*lookup_egl_image)(struct dri_context *ctx, void *handle); - void (*allocate_textures)(struct dri_drawable *drawable, - const enum st_attachment_type *statts, - unsigned count); - void (*update_drawable_info)(struct dri_drawable *drawable); - void (*flush_frontbuffer)(struct dri_drawable *drawable, - enum st_attachment_type statt); - /* gallium */ boolean d_depth_bits_last; boolean sd_depth_bits_last; diff --git a/src/gallium/state_trackers/dri/drm/dri2.c b/src/gallium/state_trackers/dri/drm/dri2.c index 9965d706c8..88ffa1e89d 100644 --- a/src/gallium/state_trackers/dri/drm/dri2.c +++ b/src/gallium/state_trackers/dri/drm/dri2.c @@ -507,9 +507,6 @@ dri2_init_screen(__DRIscreen * sPriv) screen->sPriv = sPriv; screen->fd = sPriv->fd; - screen->lookup_egl_image = dri2_lookup_egl_image; - screen->allocate_textures = dri2_allocate_textures; - screen->flush_frontbuffer = dri2_flush_frontbuffer; sPriv->private = (void *)screen; sPriv->extensions = dri_screen_extensions; @@ -531,16 +528,52 @@ fail: return NULL; } +static boolean +dri2_create_context(gl_api api, const __GLcontextModes * visual, + __DRIcontext * cPriv, void *sharedContextPrivate) +{ + struct dri_context *ctx = NULL; + + if (!dri_create_context(api, visual, cPriv, sharedContextPrivate)) + return FALSE; + + ctx = cPriv->driverPrivate; + + ctx->lookup_egl_image = dri2_lookup_egl_image; + + return TRUE; +} + +static boolean +dri2_create_buffer(__DRIscreen * sPriv, + __DRIdrawable * dPriv, + const __GLcontextModes * visual, boolean isPixmap) +{ + struct dri_drawable *drawable = NULL; + + if (!dri_create_buffer(sPriv, dPriv, visual, isPixmap)) + return FALSE; + + drawable = dPriv->driverPrivate; + + drawable->allocate_textures = dri2_allocate_textures; + drawable->flush_frontbuffer = dri2_flush_frontbuffer; + + return TRUE; +} + /** * DRI driver virtual function table. * * DRI versions differ in their implementation of init_screen and swap_buffers. */ const struct __DriverAPIRec driDriverAPI = { + .InitScreen = NULL, + .InitScreen2 = dri2_init_screen, .DestroyScreen = dri_destroy_screen, - .CreateContext = dri_create_context, + .CreateContext = dri2_create_context, .DestroyContext = dri_destroy_context, - .CreateBuffer = dri_create_buffer, + .CreateBuffer = dri2_create_buffer, .DestroyBuffer = dri_destroy_buffer, .MakeCurrent = dri_make_current, .UnbindContext = dri_unbind_context, @@ -548,9 +581,7 @@ const struct __DriverAPIRec driDriverAPI = { .GetSwapInfo = NULL, .GetDrawableMSC = NULL, .WaitForMSC = NULL, - .InitScreen2 = dri2_init_screen, - .InitScreen = NULL, .SwapBuffers = NULL, .CopySubBuffer = NULL, }; diff --git a/src/gallium/state_trackers/dri/sw/drisw.c b/src/gallium/state_trackers/dri/sw/drisw.c index d2210098d1..ae96f1b20e 100644 --- a/src/gallium/state_trackers/dri/sw/drisw.c +++ b/src/gallium/state_trackers/dri/sw/drisw.c @@ -275,9 +275,6 @@ drisw_init_screen(__DRIscreen * sPriv) screen->sPriv = sPriv; screen->fd = -1; - screen->allocate_textures = drisw_allocate_textures; - screen->update_drawable_info = drisw_update_drawable_info; - screen->flush_frontbuffer = drisw_flush_frontbuffer; swrast_no_present = debug_get_option_swrast_no_present(); @@ -298,21 +295,40 @@ fail: return NULL; } +static boolean +drisw_create_buffer(__DRIscreen * sPriv, + __DRIdrawable * dPriv, + const __GLcontextModes * visual, boolean isPixmap) +{ + struct dri_drawable *drawable = NULL; + + if (!dri_create_buffer(sPriv, dPriv, visual, isPixmap)) + return FALSE; + + drawable = dPriv->driverPrivate; + + drawable->allocate_textures = drisw_allocate_textures; + drawable->update_drawable_info = drisw_update_drawable_info; + drawable->flush_frontbuffer = drisw_flush_frontbuffer; + + return TRUE; +} + /** * DRI driver virtual function table. * * DRI versions differ in their implementation of init_screen and swap_buffers. */ const struct __DriverAPIRec driDriverAPI = { + .InitScreen = drisw_init_screen, .DestroyScreen = dri_destroy_screen, .CreateContext = dri_create_context, .DestroyContext = dri_destroy_context, - .CreateBuffer = dri_create_buffer, + .CreateBuffer = drisw_create_buffer, .DestroyBuffer = dri_destroy_buffer, .MakeCurrent = dri_make_current, .UnbindContext = dri_unbind_context, - .InitScreen = drisw_init_screen, .SwapBuffers = drisw_swap_buffers, }; -- cgit v1.2.3 From 094d66f45992830929d620782c70836b4b9b4a37 Mon Sep 17 00:00:00 2001 From: Jerome Glisse Date: Fri, 30 Jul 2010 13:58:46 -0400 Subject: r600g: add KIL opcode support Signed-off-by: Jerome Glisse --- src/gallium/drivers/r600/r600_shader.c | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) (limited to 'src/gallium') diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index dd0e039bf6..8159cefd94 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -556,6 +556,30 @@ static int tgsi_op2(struct r600_shader_ctx *ctx) return 0; } +static int tgsi_kill(struct r600_shader_ctx *ctx) +{ + struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; + struct r600_bc_alu alu; + int i, r; + + for (i = 0; i < 4; i++) { + memset(&alu, 0, sizeof(struct r600_bc_alu)); + alu.inst = ctx->inst_info->r600_opcode; + alu.dst.chan = i; + alu.src[0].sel = 248; + r = tgsi_src(ctx, &inst->Src[0], i, &alu.src[1]); + if (r) + return r; + if (i == 3) { + alu.last = 1; + } + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + } + return 0; +} + static int tgsi_slt(struct r600_shader_ctx *ctx) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; @@ -1139,7 +1163,7 @@ static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = { {TGSI_OPCODE_CALLNZ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_IFC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_BREAKC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_KIL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, /* conditional kill */ + {TGSI_OPCODE_KIL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* conditional kill */ {TGSI_OPCODE_END, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end}, /* aka HALT */ /* gap */ {118, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, -- cgit v1.2.3 From ca0b0899d8cc20f3e918839e044c55f9a19e9ffe Mon Sep 17 00:00:00 2001 From: Vinson Lee Date: Sat, 31 Jul 2010 00:41:43 -0700 Subject: st/dri: Remove unnecessary headers. --- src/gallium/state_trackers/dri/common/dri_drawable.c | 1 - src/gallium/state_trackers/dri/common/dri_screen.c | 1 - 2 files changed, 2 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/state_trackers/dri/common/dri_drawable.c b/src/gallium/state_trackers/dri/common/dri_drawable.c index 88b79a0a76..c67ca2224d 100644 --- a/src/gallium/state_trackers/dri/common/dri_drawable.c +++ b/src/gallium/state_trackers/dri/common/dri_drawable.c @@ -30,7 +30,6 @@ */ #include "dri_screen.h" -#include "dri_context.h" #include "dri_drawable.h" #include "pipe/p_screen.h" diff --git a/src/gallium/state_trackers/dri/common/dri_screen.c b/src/gallium/state_trackers/dri/common/dri_screen.c index e869b4b9b1..6ad2c7da4d 100644 --- a/src/gallium/state_trackers/dri/common/dri_screen.c +++ b/src/gallium/state_trackers/dri/common/dri_screen.c @@ -34,7 +34,6 @@ #include "dri_screen.h" #include "dri_context.h" -#include "dri_drawable.h" #include "util/u_inlines.h" #include "pipe/p_screen.h" -- cgit v1.2.3 From 6c7e0b95a4a16d268aa3ca74c5c8407779533314 Mon Sep 17 00:00:00 2001 From: Chia-I Wu Date: Sat, 31 Jul 2010 00:00:49 +0800 Subject: egl: Add checks for EGL_MESA_screen_surface. This allows Mesa EGL to be compiled with eglext.h that does not define EGL_MESA_screen_surface. --- src/egl/main/eglconfig.c | 2 + src/egl/main/eglcurrent.c | 3 +- src/egl/main/egldriver.c | 1 - src/egl/main/eglglobals.c | 1 - src/egl/main/eglglobals.h | 2 - src/egl/main/eglmode.c | 6 +++ src/egl/main/eglmode.h | 6 +++ src/egl/main/eglscreen.c | 19 +++++-- src/egl/main/eglscreen.h | 6 +++ src/egl/main/eglsurface.c | 66 ++++++++++++++++++++----- src/gallium/state_trackers/egl/common/egl_g3d.c | 2 + src/gallium/state_trackers/egl/common/egl_g3d.h | 12 +++-- 12 files changed, 101 insertions(+), 25 deletions(-) (limited to 'src/gallium') diff --git a/src/egl/main/eglconfig.c b/src/egl/main/eglconfig.c index a9af320097..ea8e47d02b 100644 --- a/src/egl/main/eglconfig.c +++ b/src/egl/main/eglconfig.c @@ -314,8 +314,10 @@ _eglValidateConfig(const _EGLConfig *conf, EGLBoolean for_matching) EGL_VG_ALPHA_FORMAT_PRE_BIT | EGL_MULTISAMPLE_RESOLVE_BOX_BIT | EGL_SWAP_BEHAVIOR_PRESERVED_BIT; +#ifdef EGL_MESA_screen_surface if (conf->Display->Extensions.MESA_screen_surface) mask |= EGL_SCREEN_BIT_MESA; +#endif break; case EGL_RENDERABLE_TYPE: case EGL_CONFORMANT: diff --git a/src/egl/main/eglcurrent.c b/src/egl/main/eglcurrent.c index c697bf796d..a207159cd8 100644 --- a/src/egl/main/eglcurrent.c +++ b/src/egl/main/eglcurrent.c @@ -1,6 +1,5 @@ #include #include -#include "eglglobals.h" #include "egllog.h" #include "eglmutex.h" #include "eglcurrent.h" @@ -300,12 +299,14 @@ _eglError(EGLint errCode, const char *msg) case EGL_BAD_SURFACE: s = "EGL_BAD_SURFACE"; break; +#ifdef EGL_MESA_screen_surface case EGL_BAD_SCREEN_MESA: s = "EGL_BAD_SCREEN_MESA"; break; case EGL_BAD_MODE_MESA: s = "EGL_BAD_MODE_MESA"; break; +#endif default: s = "other"; } diff --git a/src/egl/main/egldriver.c b/src/egl/main/egldriver.c index 1e3d7d24aa..8fc9e792b0 100644 --- a/src/egl/main/egldriver.c +++ b/src/egl/main/egldriver.c @@ -14,7 +14,6 @@ #include "egldefines.h" #include "egldisplay.h" #include "egldriver.h" -#include "eglglobals.h" #include "egllog.h" #include "eglmisc.h" #include "eglmode.h" diff --git a/src/egl/main/eglglobals.c b/src/egl/main/eglglobals.c index 725a25eca6..52eebb07f6 100644 --- a/src/egl/main/eglglobals.c +++ b/src/egl/main/eglglobals.c @@ -11,7 +11,6 @@ struct _egl_global _eglGlobal = { &_eglGlobalMutex, /* Mutex */ NULL, /* DisplayList */ - 1, /* FreeScreenHandle */ 2, /* NumAtExitCalls */ { /* default AtExitCalls, called in reverse order */ diff --git a/src/egl/main/eglglobals.h b/src/egl/main/eglglobals.h index e8bf5416e2..c3771a8ef1 100644 --- a/src/egl/main/eglglobals.h +++ b/src/egl/main/eglglobals.h @@ -16,8 +16,6 @@ struct _egl_global /* the list of all displays */ _EGLDisplay *DisplayList; - EGLScreenMESA FreeScreenHandle; - EGLint NumAtExitCalls; void (*AtExitCalls[10])(void); }; diff --git a/src/egl/main/eglmode.c b/src/egl/main/eglmode.c index 859e9318b4..37594cdb42 100644 --- a/src/egl/main/eglmode.c +++ b/src/egl/main/eglmode.c @@ -10,6 +10,9 @@ #include "eglstring.h" +#ifdef EGL_MESA_screen_surface + + #define MIN2(A, B) (((A) < (B)) ? (A) : (B)) @@ -353,3 +356,6 @@ _eglQueryModeStringMESA(_EGLDriver *drv, _EGLDisplay *dpy, _EGLMode *m) { return m->Name; } + + +#endif /* EGL_MESA_screen_surface */ diff --git a/src/egl/main/eglmode.h b/src/egl/main/eglmode.h index a089a5e194..9167cbc4b9 100644 --- a/src/egl/main/eglmode.h +++ b/src/egl/main/eglmode.h @@ -4,6 +4,9 @@ #include "egltypedefs.h" +#ifdef EGL_MESA_screen_surface + + #define EGL_NO_MODE_MESA 0 @@ -54,4 +57,7 @@ extern const char * _eglQueryModeStringMESA(_EGLDriver *drv, _EGLDisplay *dpy, _EGLMode *m); +#endif /* EGL_MESA_screen_surface */ + + #endif /* EGLMODE_INCLUDED */ diff --git a/src/egl/main/eglscreen.c b/src/egl/main/eglscreen.c index 8f96fd935c..8b8966f3ff 100644 --- a/src/egl/main/eglscreen.c +++ b/src/egl/main/eglscreen.c @@ -16,7 +16,6 @@ #include #include "egldisplay.h" -#include "eglglobals.h" #include "eglcurrent.h" #include "eglmode.h" #include "eglconfig.h" @@ -25,6 +24,14 @@ #include "eglmutex.h" +#ifdef EGL_MESA_screen_surface + + +/* ugh, no atomic op? */ +static _EGL_DECLARE_MUTEX(_eglNextScreenHandleMutex); +static EGLScreenMESA _eglNextScreenHandle = 1; + + /** * Return a new screen handle/ID. * NOTE: we never reuse these! @@ -33,10 +40,10 @@ static EGLScreenMESA _eglAllocScreenHandle(void) { EGLScreenMESA s; - - _eglLockMutex(_eglGlobal.Mutex); - s = _eglGlobal.FreeScreenHandle++; - _eglUnlockMutex(_eglGlobal.Mutex); + + _eglLockMutex(&_eglNextScreenHandleMutex); + s = _eglNextScreenHandle++; + _eglUnlockMutex(&_eglNextScreenHandleMutex); return s; } @@ -263,3 +270,5 @@ _eglDestroyScreen(_EGLScreen *scrn) free(scrn); } + +#endif /* EGL_MESA_screen_surface */ diff --git a/src/egl/main/eglscreen.h b/src/egl/main/eglscreen.h index 0fd71f71fc..3db20478ad 100644 --- a/src/egl/main/eglscreen.h +++ b/src/egl/main/eglscreen.h @@ -5,6 +5,9 @@ #include "egltypedefs.h" +#ifdef EGL_MESA_screen_surface + + /** * Per-screen information. * Note that an EGL screen doesn't have a size. A screen may be set to @@ -86,4 +89,7 @@ PUBLIC void _eglDestroyScreen(_EGLScreen *scrn); +#endif /* EGL_MESA_screen_surface */ + + #endif /* EGLSCREEN_INCLUDED */ diff --git a/src/egl/main/eglsurface.c b/src/egl/main/eglsurface.c index d46bdb0672..52f5c240c6 100644 --- a/src/egl/main/eglsurface.c +++ b/src/egl/main/eglsurface.c @@ -30,6 +30,50 @@ _eglClampSwapInterval(_EGLSurface *surf, EGLint interval) } +#ifdef EGL_MESA_screen_surface +static EGLint +_eglParseScreenSurfaceAttribList(_EGLSurface *surf, const EGLint *attrib_list) +{ + EGLint i, err = EGL_SUCCESS; + + if (!attrib_list) + return EGL_SUCCESS; + + for (i = 0; attrib_list[i] != EGL_NONE; i++) { + EGLint attr = attrib_list[i++]; + EGLint val = attrib_list[i]; + + switch (attr) { + case EGL_WIDTH: + if (val < 0) { + err = EGL_BAD_PARAMETER; + break; + } + surf->Width = val; + break; + case EGL_HEIGHT: + if (val < 0) { + err = EGL_BAD_PARAMETER; + break; + } + surf->Height = val; + break; + default: + err = EGL_BAD_ATTRIBUTE; + break; + } + + if (err != EGL_SUCCESS) { + _eglLog(_EGL_WARNING, "bad surface attribute 0x%04x", attr); + break; + } + } + + return err; +} +#endif /* EGL_MESA_screen_surface */ + + /** * Parse the list of surface attributes and return the proper error code. */ @@ -44,6 +88,11 @@ _eglParseSurfaceAttribList(_EGLSurface *surf, const EGLint *attrib_list) if (!attrib_list) return EGL_SUCCESS; +#ifdef EGL_MESA_screen_surface + if (type == EGL_SCREEN_BIT_MESA) + return _eglParseScreenSurfaceAttribList(surf, attrib_list); +#endif + if (dpy->Extensions.NOK_texture_from_pixmap) texture_type |= EGL_PIXMAP_BIT; @@ -52,12 +101,8 @@ _eglParseSurfaceAttribList(_EGLSurface *surf, const EGLint *attrib_list) EGLint val = attrib_list[i]; switch (attr) { - /* common (except for screen surfaces) attributes */ + /* common attributes */ case EGL_VG_COLORSPACE: - if (type == EGL_SCREEN_BIT_MESA) { - err = EGL_BAD_ATTRIBUTE; - break; - } switch (val) { case EGL_VG_COLORSPACE_sRGB: case EGL_VG_COLORSPACE_LINEAR: @@ -71,10 +116,6 @@ _eglParseSurfaceAttribList(_EGLSurface *surf, const EGLint *attrib_list) surf->VGColorspace = val; break; case EGL_VG_ALPHA_FORMAT: - if (type == EGL_SCREEN_BIT_MESA) { - err = EGL_BAD_ATTRIBUTE; - break; - } switch (val) { case EGL_VG_ALPHA_FORMAT_NONPRE: case EGL_VG_ALPHA_FORMAT_PRE: @@ -101,7 +142,7 @@ _eglParseSurfaceAttribList(_EGLSurface *surf, const EGLint *attrib_list) break; /* pbuffer surface attributes */ case EGL_WIDTH: - if (type != EGL_PBUFFER_BIT && type != EGL_SCREEN_BIT_MESA) { + if (type != EGL_PBUFFER_BIT) { err = EGL_BAD_ATTRIBUTE; break; } @@ -112,7 +153,7 @@ _eglParseSurfaceAttribList(_EGLSurface *surf, const EGLint *attrib_list) surf->Width = val; break; case EGL_HEIGHT: - if (type != EGL_PBUFFER_BIT && type != EGL_SCREEN_BIT_MESA) { + if (type != EGL_PBUFFER_BIT) { err = EGL_BAD_ATTRIBUTE; break; } @@ -129,6 +170,7 @@ _eglParseSurfaceAttribList(_EGLSurface *surf, const EGLint *attrib_list) } surf->LargestPbuffer = !!val; break; + /* for eglBindTexImage */ case EGL_TEXTURE_FORMAT: if (!(type & texture_type)) { err = EGL_BAD_ATTRIBUTE; @@ -210,10 +252,12 @@ _eglInitSurface(_EGLSurface *surf, _EGLDisplay *dpy, EGLint type, case EGL_PBUFFER_BIT: func = "eglCreatePBufferSurface"; break; +#ifdef EGL_MESA_screen_surface case EGL_SCREEN_BIT_MESA: func = "eglCreateScreenSurface"; renderBuffer = EGL_SINGLE_BUFFER; /* XXX correct? */ break; +#endif default: _eglLog(_EGL_WARNING, "Bad type in _eglInitSurface"); return EGL_FALSE; diff --git a/src/gallium/state_trackers/egl/common/egl_g3d.c b/src/gallium/state_trackers/egl/common/egl_g3d.c index b6321e6b43..56d575ffe0 100644 --- a/src/gallium/state_trackers/egl/common/egl_g3d.c +++ b/src/gallium/state_trackers/egl/common/egl_g3d.c @@ -434,9 +434,11 @@ egl_g3d_free_config(void *conf) static void egl_g3d_free_screen(void *scr) { +#ifdef EGL_MESA_screen_surface struct egl_g3d_screen *gscr = egl_g3d_screen((_EGLScreen *) scr); FREE(gscr->native_modes); FREE(gscr); +#endif } static EGLBoolean diff --git a/src/gallium/state_trackers/egl/common/egl_g3d.h b/src/gallium/state_trackers/egl/common/egl_g3d.h index ed2b0409bb..f33dc91cf9 100644 --- a/src/gallium/state_trackers/egl/common/egl_g3d.h +++ b/src/gallium/state_trackers/egl/common/egl_g3d.h @@ -95,15 +95,19 @@ struct egl_g3d_image { unsigned zslice; }; +/* standard typecasts */ +_EGL_DRIVER_STANDARD_TYPECASTS(egl_g3d) +_EGL_DRIVER_TYPECAST(egl_g3d_image, _EGLImage, obj) + +#ifdef EGL_MESA_screen_surface + struct egl_g3d_screen { _EGLScreen base; const struct native_connector *native; const struct native_mode **native_modes; }; - -/* standard typecasts */ -_EGL_DRIVER_STANDARD_TYPECASTS(egl_g3d) _EGL_DRIVER_TYPECAST(egl_g3d_screen, _EGLScreen, obj) -_EGL_DRIVER_TYPECAST(egl_g3d_image, _EGLImage, obj) + +#endif /* EGL_MESA_screen_surface */ #endif /* _EGL_G3D_H_ */ -- cgit v1.2.3 From 5fa1af0905ef2f513fe296f1d8e658dbdf24dd74 Mon Sep 17 00:00:00 2001 From: Vinson Lee Date: Sat, 31 Jul 2010 14:56:53 -0700 Subject: r600g: Remove unnecessary header. --- src/gallium/drivers/r600/r600_texture.c | 1 - 1 file changed, 1 deletion(-) (limited to 'src/gallium') diff --git a/src/gallium/drivers/r600/r600_texture.c b/src/gallium/drivers/r600/r600_texture.c index 96173b0ed6..3725bf8560 100644 --- a/src/gallium/drivers/r600/r600_texture.c +++ b/src/gallium/drivers/r600/r600_texture.c @@ -33,7 +33,6 @@ #include "r600_screen.h" #include "r600_context.h" #include "r600_resource.h" -#include "r600d.h" extern struct u_resource_vtbl r600_texture_vtbl; -- cgit v1.2.3 From 1d74a1674c0675dc59588167741c4abd2cc82cf9 Mon Sep 17 00:00:00 2001 From: Vinson Lee Date: Sat, 31 Jul 2010 19:23:14 -0700 Subject: r600g: Remove unnecessary header. --- src/gallium/drivers/r600/r700_asm.c | 1 - 1 file changed, 1 deletion(-) (limited to 'src/gallium') diff --git a/src/gallium/drivers/r600/r700_asm.c b/src/gallium/drivers/r600/r700_asm.c index 3532ba5b0c..1ebe20d6ab 100644 --- a/src/gallium/drivers/r600/r700_asm.c +++ b/src/gallium/drivers/r600/r700_asm.c @@ -25,7 +25,6 @@ #include "util/u_memory.h" #include "r700_sq.h" #include -#include int r700_bc_alu_build(struct r600_bc *bc, struct r600_bc_alu *alu, unsigned id) { -- cgit v1.2.3 From a87cedbde0f6f1335691c524d539552f5c056215 Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Sun, 1 Aug 2010 16:42:53 +0200 Subject: r300g: fix microtiling on RS6xx Getting tiling right has always been tricky. There are so many subtle details... --- src/gallium/drivers/r300/r300_texture_desc.c | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/drivers/r300/r300_texture_desc.c b/src/gallium/drivers/r300/r300_texture_desc.c index 343089bf2c..5d690e8c33 100644 --- a/src/gallium/drivers/r300/r300_texture_desc.c +++ b/src/gallium/drivers/r300/r300_texture_desc.c @@ -136,13 +136,27 @@ static unsigned r300_texture_get_stride(struct r300_screen *screen, stride = util_format_get_stride(desc->b.b.format, width); - /* Some IGPs need a minimum stride of 64 bytes, hmm... - * This doesn't seem to apply to tiled textures, according to r300c. */ - if (!desc->microtile && !desc->macrotile[level] && + /* Some IGPs need a minimum stride of 64 bytes, hmm... */ + if (!desc->macrotile[level] && (screen->caps.family == CHIP_FAMILY_RS600 || screen->caps.family == CHIP_FAMILY_RS690 || screen->caps.family == CHIP_FAMILY_RS740)) { - return stride < 64 ? 64 : stride; + unsigned min_stride; + + if (desc->microtile) { + unsigned tile_height = + r300_get_pixel_alignment(desc->b.b.format, + desc->b.b.nr_samples, + desc->microtile, + desc->macrotile[level], + DIM_HEIGHT); + + min_stride = 64 / tile_height; + } else { + min_stride = 64; + } + + return stride < min_stride ? min_stride : stride; } /* The alignment to 32 bytes is sort of implied by the layout... */ -- cgit v1.2.3 From aef4500ca4ae9e5bc57560c6f9a32d9ad892975b Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Mon, 2 Aug 2010 08:57:14 +1000 Subject: draw: fix warning in sse code. Not sure if this will actually fix the issue, but it fixes the warning. --- src/gallium/auxiliary/draw/draw_vs_sse.c | 1 + 1 file changed, 1 insertion(+) (limited to 'src/gallium') diff --git a/src/gallium/auxiliary/draw/draw_vs_sse.c b/src/gallium/auxiliary/draw/draw_vs_sse.c index 14c95082a9..0b0c6077c6 100644 --- a/src/gallium/auxiliary/draw/draw_vs_sse.c +++ b/src/gallium/auxiliary/draw/draw_vs_sse.c @@ -84,6 +84,7 @@ vs_sse_run_linear( struct draw_vertex_shader *base, const float (*input)[4], float (*output)[4], const void *constants[PIPE_MAX_CONSTANT_BUFFERS], + const unsigned const_size[PIPE_MAX_CONSTANT_BUFFERS], unsigned count, unsigned input_stride, unsigned output_stride ) -- cgit v1.2.3 From 919750a55771cfd731e2f90d1385cbb4265e4cf4 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Mon, 2 Aug 2010 14:31:49 +1000 Subject: r600g: make r600_db_format static. this isn't used anywhere else yet. --- src/gallium/drivers/r600/r600_state.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium') diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index 0191070daa..b27d9d5557 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -667,7 +667,7 @@ static struct radeon_state *r600_cb0(struct r600_context *rctx) return rstate; } -int r600_db_format(unsigned pformat, unsigned *format) +static int r600_db_format(unsigned pformat, unsigned *format) { switch (pformat) { case PIPE_FORMAT_Z24X8_UNORM: -- cgit v1.2.3 From 01984cf34fa4e6d564d06a126795468b5718ecb0 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Mon, 2 Aug 2010 14:53:31 +1000 Subject: r600g: set correct tex coord type for rect textures. --- src/gallium/drivers/r600/r600_shader.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index 8159cefd94..0582839905 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -952,10 +952,12 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) tex.src_sel_y = 1; tex.src_sel_z = 2; tex.src_sel_w = 3; - tex.coord_type_x = 1; - tex.coord_type_y = 1; - tex.coord_type_z = 1; - tex.coord_type_w = 1; + if (inst->Texture.Texture != TGSI_TEXTURE_RECT) { + tex.coord_type_x = 1; + tex.coord_type_y = 1; + tex.coord_type_z = 1; + tex.coord_type_w = 1; + } return r600_bc_add_tex(ctx->bc, &tex); } -- cgit v1.2.3 From 7bcd39ce50b7b710bb8561c430f345ebe91ab9a3 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Mon, 2 Aug 2010 14:42:29 +1000 Subject: r600g: add initial blend state. migrates cb_cntl to be regenerated --- src/gallium/drivers/r600/r600_context.c | 14 ---- src/gallium/drivers/r600/r600_context.h | 1 + src/gallium/drivers/r600/r600_reg.h | 72 +++++++++++++++++++ src/gallium/drivers/r600/r600_state.c | 95 ++++++++++++++++++++++-- src/gallium/drivers/r600/r600_state_inlines.h | 100 ++++++++++++++++++++++++++ 5 files changed, 262 insertions(+), 20 deletions(-) create mode 100644 src/gallium/drivers/r600/r600_reg.h create mode 100644 src/gallium/drivers/r600/r600_state_inlines.h (limited to 'src/gallium') diff --git a/src/gallium/drivers/r600/r600_context.c b/src/gallium/drivers/r600/r600_context.c index fc8aa1b866..8b191914f5 100644 --- a/src/gallium/drivers/r600/r600_context.c +++ b/src/gallium/drivers/r600/r600_context.c @@ -331,20 +331,6 @@ struct pipe_context *r600_create_context(struct pipe_screen *screen, void *priv) return NULL; } - rctx->hw_states.cb_cntl = radeon_state(rscreen->rw, R600_CB_CNTL_TYPE, R600_CB_CNTL); - rctx->hw_states.cb_cntl->states[R600_CB_CNTL__CB_SHADER_MASK] = 0x0000000F; - rctx->hw_states.cb_cntl->states[R600_CB_CNTL__CB_TARGET_MASK] = 0x0000000F; - rctx->hw_states.cb_cntl->states[R600_CB_CNTL__CB_COLOR_CONTROL] = 0x00CC0000; - rctx->hw_states.cb_cntl->states[R600_CB_CNTL__PA_SC_AA_CONFIG] = 0x00000000; - rctx->hw_states.cb_cntl->states[R600_CB_CNTL__PA_SC_AA_SAMPLE_LOCS_MCTX] = 0x00000000; - rctx->hw_states.cb_cntl->states[R600_CB_CNTL__PA_SC_AA_SAMPLE_LOCS_8S_WD1_MCTX] = 0x00000000; - rctx->hw_states.cb_cntl->states[R600_CB_CNTL__CB_CLRCMP_CONTROL] = 0x01000000; - rctx->hw_states.cb_cntl->states[R600_CB_CNTL__CB_CLRCMP_SRC] = 0x00000000; - rctx->hw_states.cb_cntl->states[R600_CB_CNTL__CB_CLRCMP_DST] = 0x000000FF; - rctx->hw_states.cb_cntl->states[R600_CB_CNTL__CB_CLRCMP_MSK] = 0xFFFFFFFF; - rctx->hw_states.cb_cntl->states[R600_CB_CNTL__PA_SC_AA_MASK] = 0xFFFFFFFF; - radeon_state_pm4(rctx->hw_states.cb_cntl); - r600_init_config(rctx); rctx->ctx = radeon_ctx(rscreen->rw); diff --git a/src/gallium/drivers/r600/r600_context.h b/src/gallium/drivers/r600/r600_context.h index 97c8a46bb0..a1ee9577ba 100644 --- a/src/gallium/drivers/r600/r600_context.h +++ b/src/gallium/drivers/r600/r600_context.h @@ -158,6 +158,7 @@ struct r600_context { struct r600_vertex_element *vertex_elements; struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS]; struct pipe_index_buffer index_buffer; + struct pipe_blend_color blend_color; }; #if 0 diff --git a/src/gallium/drivers/r600/r600_reg.h b/src/gallium/drivers/r600/r600_reg.h new file mode 100644 index 0000000000..2600875d5c --- /dev/null +++ b/src/gallium/drivers/r600/r600_reg.h @@ -0,0 +1,72 @@ +#ifndef R600_REG_H +#define R600_REG_H + +/* for regs which haven't been generated yet */ + +#define R600_BLEND_ZERO 0 +#define R600_BLEND_ONE 1 +#define R600_BLEND_SRC_COLOR 2 +#define R600_BLEND_ONE_MINUS_SRC_COLOR 3 +#define R600_BLEND_SRC_ALPHA 4 +#define R600_BLEND_ONE_MINUS_SRC_ALPHA 5 +#define R600_BLEND_DST_ALPHA 6 +#define R600_BLEND_ONE_MINUS_DST_ALPHA 7 +#define R600_BLEND_DST_COLOR 8 +#define R600_BLEND_ONE_MINUS_DST_COLOR 9 +#define R600_BLEND_SRC_ALPHA_SATURATE 10 +#define R600_BLEND_BOTH_SRC_ALPHA 11 +#define R600_BLEND_BOTH_INV_SRC_ALPHA 12 +#define R600_BLEND_CONST_COLOR 13 +#define R600_BLEND_ONE_MINUS_CONST_COLOR 14 +#define R600_BLEND_SRC1_COLOR 15 +#define R600_BLEND_INV_SRC1_COLOR 16 +#define R600_BLEND_SRC1_ALPHA 17 +#define R600_BLEND_INV_SRC1_ALPHA 18 +#define R600_BLEND_CONST_ALPHA 19 +#define R600_BLEND_ONE_MINUS_CONST_ALPHA 20 + +#define R600_BLEND_FCN_ADD 0 +#define R600_BLEND_FCN_SUBTRACT 1 +#define R600_BLEND_FCN_MIN 2 +#define R600_BLEND_FCN_MAX 3 +#define R600_BLEND_FCN_RSUB 4 + +#define CB_BLEND_COLOR_SRCBLEND_SHIFT 0 +#define CB_BLEND_COLOR_COMB_FCN_SHIFT 5 +#define CB_BLEND_COLOR_DESTBLEND_SHIFT 8 +#define CB_BLEND_ALPHA_SRCBLEND_SHIFT 16 +#define CB_BLEND_ALPHA_COMB_FCN_SHIFT 21 +#define CB_BLEND_ALPHA_DESTBLEND_SHIFT 24 +#define CB_BLEND_SEPARATE_ALPHA_BLEND (1 << 29) + +#define SX_ALPHA_TEST_FUNC_SHIFT (0) +#define SX_ALPHA_TEST_ENABLE (1 << 3) + +#define R600_ZS_KEEP 0 +#define R600_ZS_ZERO 1 +#define R600_ZS_REPLACE 2 +#define R600_ZS_INCR 3 +#define R600_ZS_DECR 4 +#define R600_ZS_INVERT 5 +#define R600_ZS_INCR_WRAP 6 +#define R600_ZS_DECR_WRAP 7 + +#define R600_STENCILREF_SHIFT 0 +#define R600_STENCILMASK_SHIFT 8 +#define R600_STENCILWRITEMASK_SHIFT 16 + +#define PA_SU_PS_WIDTH_SHIFT 16 + +#define PA_SU_CULL_FRONT (1 << 0) +#define PA_SU_CULL_BACK (1 << 1) +#define PA_SU_FACE_CCW (0 << 2) +#define PA_SU_FACE_CW (1 << 2) + +#define PA_SU_POLYMODE_FRONT_SHIFT 5 +#define PA_SU_POLYMODE_BACK_SHIFT 5 +#define POLYGON_MODE_POINT 0 +#define POLYGON_MODE_LINE 1 +#define POLYGON_MODE_TRI 2 + + +#endif diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index b27d9d5557..56304cc69f 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -32,6 +32,8 @@ #include "r600_context.h" #include "r600_resource.h" #include "r600d.h" +#include "r600_reg.h" +#include "r600_state_inlines.h" static void *r600_create_blend_state(struct pipe_context *ctx, const struct pipe_blend_state *state) @@ -259,6 +261,9 @@ static void r600_delete_state(struct pipe_context *ctx, void *state) static void r600_set_blend_color(struct pipe_context *ctx, const struct pipe_blend_color *color) { + struct r600_context *rctx = r600_context(ctx); + + rctx->blend_color = *color; } static void r600_set_clip_state(struct pipe_context *ctx, @@ -604,15 +609,17 @@ static struct radeon_state *r600_blend(struct r600_context *rctx) { struct r600_screen *rscreen = rctx->screen; struct radeon_state *rstate; + const struct pipe_blend_state *state = &rctx->blend->state.blend; + int i; rstate = radeon_state(rscreen->rw, R600_BLEND_TYPE, R600_BLEND); if (rstate == NULL) return NULL; - rstate->states[R600_BLEND__CB_BLEND_RED] = 0x00000000; - rstate->states[R600_BLEND__CB_BLEND_GREEN] = 0x00000000; - rstate->states[R600_BLEND__CB_BLEND_BLUE] = 0x00000000; - rstate->states[R600_BLEND__CB_BLEND_ALPHA] = 0x00000000; - rstate->states[R600_BLEND__CB_BLEND0_CONTROL] = 0x00010001; + rstate->states[R600_BLEND__CB_BLEND_RED] = fui(rctx->blend_color.color[0]); + rstate->states[R600_BLEND__CB_BLEND_GREEN] = fui(rctx->blend_color.color[1]); + rstate->states[R600_BLEND__CB_BLEND_BLUE] = fui(rctx->blend_color.color[2]); + rstate->states[R600_BLEND__CB_BLEND_ALPHA] = fui(rctx->blend_color.color[3]); + rstate->states[R600_BLEND__CB_BLEND0_CONTROL] = 0x00000000; rstate->states[R600_BLEND__CB_BLEND1_CONTROL] = 0x00000000; rstate->states[R600_BLEND__CB_BLEND2_CONTROL] = 0x00000000; rstate->states[R600_BLEND__CB_BLEND3_CONTROL] = 0x00000000; @@ -621,6 +628,37 @@ static struct radeon_state *r600_blend(struct r600_context *rctx) rstate->states[R600_BLEND__CB_BLEND6_CONTROL] = 0x00000000; rstate->states[R600_BLEND__CB_BLEND7_CONTROL] = 0x00000000; rstate->states[R600_BLEND__CB_BLEND_CONTROL] = 0x00000000; + + for (i = 0; i < 8; i++) { + + unsigned eqRGB = state->rt[i].rgb_func; + unsigned srcRGB = state->rt[i].rgb_src_factor; + unsigned dstRGB = state->rt[i].rgb_dst_factor; + + unsigned eqA = state->rt[i].alpha_func; + unsigned srcA = state->rt[i].alpha_src_factor; + unsigned dstA = state->rt[i].alpha_dst_factor; + uint32_t bc = 0; + + if (!state->rt[i].blend_enable) + continue; + + bc |= r600_translate_blend_function(eqRGB) << CB_BLEND_COLOR_COMB_FCN_SHIFT; + bc |= r600_translate_blend_factor(srcRGB) << CB_BLEND_COLOR_SRCBLEND_SHIFT; + bc |= r600_translate_blend_factor(dstRGB) << CB_BLEND_COLOR_DESTBLEND_SHIFT; + + if (srcA != srcRGB || dstA != dstRGB || eqA != eqRGB) { + bc |= CB_BLEND_SEPARATE_ALPHA_BLEND; + bc |= r600_translate_blend_function(eqA) << CB_BLEND_ALPHA_COMB_FCN_SHIFT; + bc |= r600_translate_blend_factor(srcA) << CB_BLEND_ALPHA_SRCBLEND_SHIFT; + bc |= r600_translate_blend_factor(dstA) << CB_BLEND_ALPHA_DESTBLEND_SHIFT; + } + + rstate->states[R600_BLEND__CB_BLEND0_CONTROL + i] = bc; + if (i == 0) + rstate->states[R600_BLEND__CB_BLEND_CONTROL] = bc; + } + if (radeon_state_pm4(rstate)) { radeon_state_decref(rstate); return NULL; @@ -1084,6 +1122,49 @@ static struct radeon_state *r600_resource(struct r600_context *rctx, return rstate; } +static struct radeon_state *r600_cb_cntl(struct r600_context *rctx) +{ + struct r600_screen *rscreen = rctx->screen; + struct radeon_state *rstate; + const struct pipe_blend_state *pbs = &rctx->blend->state.blend; + uint32_t color_control, target_mask; + int i; + + target_mask = 0; + color_control = 0; + + if (pbs->logicop_enable) { + color_control |= (pbs->logicop_func) << 16; + } else + color_control |= (0xcc << 16); + + target_mask |= (pbs->rt[0].colormask); + for (i = 0; i < 8; i++) { + if (pbs->rt[i].blend_enable) { + color_control |= (1 << (8 + i)); + target_mask |= (pbs->rt[0].colormask << (4 * i)); + } else if (i == 0) + target_mask |= 0xf; + } + rstate = radeon_state(rscreen->rw, R600_CB_CNTL_TYPE, R600_CB_CNTL); + rstate->states[R600_CB_CNTL__CB_SHADER_MASK] = 0x0000000F; + rstate->states[R600_CB_CNTL__CB_TARGET_MASK] = target_mask; + rstate->states[R600_CB_CNTL__CB_COLOR_CONTROL] = color_control; + rstate->states[R600_CB_CNTL__PA_SC_AA_CONFIG] = 0x00000000; + rstate->states[R600_CB_CNTL__PA_SC_AA_SAMPLE_LOCS_MCTX] = 0x00000000; + rstate->states[R600_CB_CNTL__PA_SC_AA_SAMPLE_LOCS_8S_WD1_MCTX] = 0x00000000; + rstate->states[R600_CB_CNTL__CB_CLRCMP_CONTROL] = 0x01000000; + rstate->states[R600_CB_CNTL__CB_CLRCMP_SRC] = 0x00000000; + rstate->states[R600_CB_CNTL__CB_CLRCMP_DST] = 0x000000FF; + rstate->states[R600_CB_CNTL__CB_CLRCMP_MSK] = 0xFFFFFFFF; + rstate->states[R600_CB_CNTL__PA_SC_AA_MASK] = 0xFFFFFFFF; + if (radeon_state_pm4(rstate)) { + radeon_state_decref(rstate); + return NULL; + } + return rstate; +} + int r600_context_hw_states(struct r600_context *rctx) { unsigned i; @@ -1093,7 +1174,7 @@ int r600_context_hw_states(struct r600_context *rctx) * doesn't */ //radeon_state_decref(rctx->hw_states.config); - //radeon_state_decref(rctx->hw_states.cb_cntl); + radeon_state_decref(rctx->hw_states.cb_cntl); radeon_state_decref(rctx->hw_states.db); radeon_state_decref(rctx->hw_states.rasterizer); radeon_state_decref(rctx->hw_states.scissor); @@ -1120,6 +1201,8 @@ int r600_context_hw_states(struct r600_context *rctx) rctx->hw_states.viewport = r600_viewport(rctx); rctx->hw_states.cb0 = r600_cb0(rctx); rctx->hw_states.db = r600_db(rctx); + rctx->hw_states.cb_cntl = r600_cb_cntl(rctx); + for (i = 0; i < rctx->ps_nsampler; i++) { if (rctx->ps_sampler[i]) { rctx->hw_states.ps_sampler[i] = r600_sampler(rctx, diff --git a/src/gallium/drivers/r600/r600_state_inlines.h b/src/gallium/drivers/r600/r600_state_inlines.h new file mode 100644 index 0000000000..42bab52b3f --- /dev/null +++ b/src/gallium/drivers/r600/r600_state_inlines.h @@ -0,0 +1,100 @@ +/* + * Copyright 2010 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#ifndef R600_STATE_INLINES_H +#define R600_STATE_INLINES_H + +#include "r600_reg.h" + +static INLINE uint32_t r600_translate_blend_function(int blend_func) +{ + switch (blend_func) { + case PIPE_BLEND_ADD: + return R600_BLEND_FCN_ADD; + case PIPE_BLEND_SUBTRACT: + return R600_BLEND_FCN_SUBTRACT; + case PIPE_BLEND_REVERSE_SUBTRACT: + return R600_BLEND_FCN_RSUB; + case PIPE_BLEND_MIN: + return R600_BLEND_FCN_MIN; + case PIPE_BLEND_MAX: + return R600_BLEND_FCN_MAX; + default: + fprintf(stderr, "r600: Unknown blend function %d\n", blend_func); + assert(0); + break; + } + return 0; +} + +static INLINE uint32_t r600_translate_blend_factor(int blend_fact) +{ + switch (blend_fact) { + case PIPE_BLENDFACTOR_ONE: + return R600_BLEND_ZERO; + case PIPE_BLENDFACTOR_SRC_COLOR: + return R600_BLEND_SRC_COLOR; + case PIPE_BLENDFACTOR_SRC_ALPHA: + return R600_BLEND_SRC_ALPHA; + case PIPE_BLENDFACTOR_DST_ALPHA: + return R600_BLEND_DST_ALPHA; + case PIPE_BLENDFACTOR_DST_COLOR: + return R600_BLEND_DST_COLOR; + case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: + return R600_BLEND_SRC_ALPHA_SATURATE; + case PIPE_BLENDFACTOR_CONST_COLOR: + return R600_BLEND_CONST_COLOR; + case PIPE_BLENDFACTOR_CONST_ALPHA: + return R600_BLEND_CONST_ALPHA; + case PIPE_BLENDFACTOR_ZERO: + return R600_BLEND_ZERO; + case PIPE_BLENDFACTOR_INV_SRC_COLOR: + return R600_BLEND_ONE_MINUS_SRC_COLOR; + case PIPE_BLENDFACTOR_INV_SRC_ALPHA: + return R600_BLEND_ONE_MINUS_SRC_ALPHA; + case PIPE_BLENDFACTOR_INV_DST_ALPHA: + return R600_BLEND_ONE_MINUS_DST_ALPHA; + case PIPE_BLENDFACTOR_INV_DST_COLOR: + return R600_BLEND_ONE_MINUS_DST_COLOR; + case PIPE_BLENDFACTOR_INV_CONST_COLOR: + return R600_BLEND_ONE_MINUS_CONST_COLOR; + case PIPE_BLENDFACTOR_INV_CONST_ALPHA: + return R600_BLEND_ONE_MINUS_CONST_ALPHA; + + case PIPE_BLENDFACTOR_SRC1_COLOR: + return R600_BLEND_SRC1_COLOR; + case PIPE_BLENDFACTOR_SRC1_ALPHA: + return R600_BLEND_SRC1_ALPHA; + case PIPE_BLENDFACTOR_INV_SRC1_COLOR: + return R600_BLEND_INV_SRC1_COLOR; + case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: + return R600_BLEND_INV_SRC1_ALPHA; + default: + fprintf(stderr, "r600: Implementation error: " + "Bad blend factor %d not supported!\n", blend_fact); + assert(0); + break; + } + return 0; +} + +#endif -- cgit v1.2.3 From 3f4ec394b027c6d947ccc88309a7d37bc3859e9d Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Mon, 2 Aug 2010 14:46:17 +1000 Subject: r600g: initial alpha test state --- src/gallium/drivers/r600/r600_state.c | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index 56304cc69f..9af39f7218 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -873,19 +873,27 @@ static struct radeon_state *r600_dsa(struct r600_context *rctx) const struct pipe_depth_stencil_alpha_state *state = &rctx->dsa->state.dsa; struct r600_screen *rscreen = rctx->screen; struct radeon_state *rstate; - unsigned db_depth_control; - + unsigned db_depth_control, alpha_test_control, alpha_ref; + rstate = radeon_state(rscreen->rw, R600_DSA_TYPE, R600_DSA); if (rstate == NULL) return NULL; + db_depth_control = 0x00700700 | S_028800_Z_ENABLE(state->depth.enabled) | S_028800_Z_WRITE_ENABLE(state->depth.writemask) | S_028800_ZFUNC(state->depth.func); - + alpha_test_control = 0; + alpha_ref = 0; + if (state->alpha.enabled) { + alpha_test_control = (state->alpha.func) << 0; + alpha_test_control |= SX_ALPHA_TEST_ENABLE; + alpha_ref = fui(state->alpha.ref_value); + } + rstate->states[R600_DSA__DB_STENCIL_CLEAR] = 0x00000000; rstate->states[R600_DSA__DB_DEPTH_CLEAR] = 0x3F800000; - rstate->states[R600_DSA__SX_ALPHA_TEST_CONTROL] = 0x00000000; + rstate->states[R600_DSA__SX_ALPHA_TEST_CONTROL] = alpha_test_control; rstate->states[R600_DSA__DB_STENCILREFMASK] = 0xFFFFFF00; rstate->states[R600_DSA__DB_STENCILREFMASK_BF] = 0xFFFFFF00; - rstate->states[R600_DSA__SX_ALPHA_REF] = 0x00000000; + rstate->states[R600_DSA__SX_ALPHA_REF] = alpha_ref; rstate->states[R600_DSA__SPI_FOG_FUNC_SCALE] = 0x00000000; rstate->states[R600_DSA__SPI_FOG_FUNC_BIAS] = 0x00000000; rstate->states[R600_DSA__SPI_FOG_CNTL] = 0x00000000; -- cgit v1.2.3 From 4af5f11c3232015006f61c1a6befdff3411b8d6c Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Mon, 2 Aug 2010 14:48:59 +1000 Subject: r600g: add stencil op/func translation --- src/gallium/drivers/r600/r600_state.c | 36 ++++++++++++++++++++++++--- src/gallium/drivers/r600/r600_state_inlines.h | 33 ++++++++++++++++++++++++ 2 files changed, 66 insertions(+), 3 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index 9af39f7218..bbfe4da840 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -874,12 +874,42 @@ static struct radeon_state *r600_dsa(struct r600_context *rctx) struct r600_screen *rscreen = rctx->screen; struct radeon_state *rstate; unsigned db_depth_control, alpha_test_control, alpha_ref; + unsigned stencil_ref_mask, stencil_ref_mask_bf; rstate = radeon_state(rscreen->rw, R600_DSA_TYPE, R600_DSA); if (rstate == NULL) return NULL; - db_depth_control = 0x00700700 | S_028800_Z_ENABLE(state->depth.enabled) | S_028800_Z_WRITE_ENABLE(state->depth.writemask) | S_028800_ZFUNC(state->depth.func); + stencil_ref_mask = 0; + stencil_ref_mask_bf = 0; + db_depth_control = 0x00700700 | + S_028800_Z_ENABLE(state->depth.enabled) | + S_028800_Z_WRITE_ENABLE(state->depth.writemask) | + S_028800_ZFUNC(state->depth.func); + /* set stencil enable */ + db_depth_control |= S_028800_STENCIL_ENABLE(state->stencil[0].enabled); + + if (state->stencil[0].enabled) { + + db_depth_control |= S_028800_STENCILFUNC(r600_translate_ds_func(state->stencil[0].func)); + db_depth_control |= S_028800_STENCILFAIL(r600_translate_stencil_op(state->stencil[0].fail_op)); + db_depth_control |= S_028800_STENCILZPASS(r600_translate_stencil_op(state->stencil[0].zpass_op)); + db_depth_control |= S_028800_STENCILZFAIL(r600_translate_stencil_op(state->stencil[0].zfail_op)); + + db_depth_control |= S_028800_BACKFACE_ENABLE(state->stencil[1].enabled); + + stencil_ref_mask = (state->stencil[0].valuemask << R600_STENCILMASK_SHIFT) | + (state->stencil[0].writemask << R600_STENCILWRITEMASK_SHIFT); + if (state->stencil[1].enabled) { + db_depth_control |= S_028800_STENCILFUNC_BF(r600_translate_ds_func(state->stencil[1].func)); + db_depth_control |= S_028800_STENCILFAIL_BF(r600_translate_stencil_op(state->stencil[1].fail_op)); + db_depth_control |= S_028800_STENCILZPASS_BF(r600_translate_stencil_op(state->stencil[1].zpass_op)); + db_depth_control |= S_028800_STENCILZFAIL_BF(r600_translate_stencil_op(state->stencil[1].zfail_op)); + stencil_ref_mask_bf = (state->stencil[1].valuemask << R600_STENCILMASK_SHIFT) | + (state->stencil[1].writemask << R600_STENCILWRITEMASK_SHIFT); + } + } + alpha_test_control = 0; alpha_ref = 0; if (state->alpha.enabled) { @@ -891,8 +921,8 @@ static struct radeon_state *r600_dsa(struct r600_context *rctx) rstate->states[R600_DSA__DB_STENCIL_CLEAR] = 0x00000000; rstate->states[R600_DSA__DB_DEPTH_CLEAR] = 0x3F800000; rstate->states[R600_DSA__SX_ALPHA_TEST_CONTROL] = alpha_test_control; - rstate->states[R600_DSA__DB_STENCILREFMASK] = 0xFFFFFF00; - rstate->states[R600_DSA__DB_STENCILREFMASK_BF] = 0xFFFFFF00; + rstate->states[R600_DSA__DB_STENCILREFMASK] = stencil_ref_mask; + rstate->states[R600_DSA__DB_STENCILREFMASK_BF] = stencil_ref_mask_bf; rstate->states[R600_DSA__SX_ALPHA_REF] = alpha_ref; rstate->states[R600_DSA__SPI_FOG_FUNC_SCALE] = 0x00000000; rstate->states[R600_DSA__SPI_FOG_FUNC_BIAS] = 0x00000000; diff --git a/src/gallium/drivers/r600/r600_state_inlines.h b/src/gallium/drivers/r600/r600_state_inlines.h index 42bab52b3f..369263dc8f 100644 --- a/src/gallium/drivers/r600/r600_state_inlines.h +++ b/src/gallium/drivers/r600/r600_state_inlines.h @@ -97,4 +97,37 @@ static INLINE uint32_t r600_translate_blend_factor(int blend_fact) return 0; } +static INLINE uint32_t r600_translate_stencil_op(int s_op) +{ + switch (s_op) { + case PIPE_STENCIL_OP_KEEP: + return R600_ZS_KEEP; + case PIPE_STENCIL_OP_ZERO: + return R600_ZS_ZERO; + case PIPE_STENCIL_OP_REPLACE: + return R600_ZS_REPLACE; + case PIPE_STENCIL_OP_INCR: + return R600_ZS_INCR; + case PIPE_STENCIL_OP_DECR: + return R600_ZS_DECR; + case PIPE_STENCIL_OP_INCR_WRAP: + return R600_ZS_INCR_WRAP; + case PIPE_STENCIL_OP_DECR_WRAP: + return R600_ZS_DECR_WRAP; + case PIPE_STENCIL_OP_INVERT: + return R600_ZS_INVERT; + default: + fprintf(stderr, "r600: Unknown stencil op %d", s_op); + assert(0); + break; + } + return 0; +} + +/* translates straight */ +static INLINE uint32_t r600_translate_ds_func(int func) +{ + return func; +} + #endif -- cgit v1.2.3 From c1f33097f4a6cd33df57dc601ba1733985979a4f Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Fri, 30 Jul 2010 16:59:05 +0200 Subject: util: Fix unpacking of R8G8Bx_SNORM format. Apparently, we must always use integers to perform calculations, otherwise the results won't match D3D's CxV8U8 definition. --- src/gallium/auxiliary/util/u_format_other.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/auxiliary/util/u_format_other.c b/src/gallium/auxiliary/util/u_format_other.c index 723fa8c3bf..fa42ec3713 100644 --- a/src/gallium/auxiliary/util/u_format_other.c +++ b/src/gallium/auxiliary/util/u_format_other.c @@ -121,6 +121,15 @@ util_format_r1_unorm_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, * A.k.a. D3DFMT_CxV8U8 */ +static uint8_t +r8g8bx_derive(int16_t r, int16_t g) +{ + /* Derive blue from red and green components. + * Apparently, we must always use integers to perform calculations, + * otherwise the results won't match D3D's CxV8U8 definition. + */ + return (uint8_t)sqrtf(0x7f * 0x7f - r * r - g * g) * 0xff / 0x7f; +} void util_format_r8g8bx_snorm_unpack_rgba_float(float *dst_row, unsigned dst_stride, @@ -145,7 +154,7 @@ util_format_r8g8bx_snorm_unpack_rgba_float(float *dst_row, unsigned dst_stride, dst[0] = (float)(r * (1.0f/0x7f)); /* r */ dst[1] = (float)(g * (1.0f/0x7f)); /* g */ - dst[2] = sqrtf(1.0f - dst[0] * dst[0] - dst[1] * dst[1]); /* b */ + dst[2] = r8g8bx_derive(r, g) * (1.0f/0xff); /* b */ dst[3] = 1.0f; /* a */ dst += 4; } @@ -177,7 +186,7 @@ util_format_r8g8bx_snorm_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_strid dst[0] = (uint8_t)(((uint16_t)MAX2(r, 0)) * 0xff / 0x7f); /* r */ dst[1] = (uint8_t)(((uint16_t)MAX2(g, 0)) * 0xff / 0x7f); /* g */ - dst[2] = (uint8_t)sqrtf(0x7f*0x7f - r * r - g * g) * 0xff / 0x7f; /* b */ + dst[2] = r8g8bx_derive(r, g); /* b */ dst[3] = 255; /* a */ dst += 4; } @@ -262,6 +271,6 @@ util_format_r8g8bx_snorm_fetch_rgba_float(float *dst, const uint8_t *src, dst[0] = r * (1.0f/0x7f); /* r */ dst[1] = g * (1.0f/0x7f); /* g */ - dst[2] = sqrtf(1.0f - dst[0] * dst[0] - dst[1] * dst[1]); /* b */ + dst[2] = r8g8bx_derive(r, g) * (1.0f/0xff); /* b */ dst[3] = 1.0f; /* a */ } -- cgit v1.2.3 From afa925066c158ac49e3b0f883f67debd8545bf26 Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Mon, 2 Aug 2010 17:14:07 +0200 Subject: r300g: fix hardlock when using more than one stuffed sprite coords If texture coordinates come from the vertex shader, there are always 4 components in the rasterizer input packet, but if the coordinates are stuffed (like for point sprites), there are only 2 or 3 components (based on GB_ENABLE) and if we rasterize more, it locks up. --- src/gallium/drivers/r300/r300_state.c | 2 +- src/gallium/drivers/r300/r300_state_derived.c | 38 ++++++++++++++------------- 2 files changed, 21 insertions(+), 19 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c index fced77e6f1..bc0c86d8a4 100644 --- a/src/gallium/drivers/r300/r300_state.c +++ b/src/gallium/drivers/r300/r300_state.c @@ -1014,7 +1014,7 @@ static void* r300_create_rs_state(struct pipe_context* pipe, for (i = 0; i < 8; i++) { if (state->sprite_coord_enable & (1 << i)) stuffing_enable |= - R300_GB_TEX_STR << (R300_GB_TEX0_SOURCE_SHIFT + (i*2)); + R300_GB_TEX_ST << (R300_GB_TEX0_SOURCE_SHIFT + (i*2)); } point_texcoord_left = 0.0f; diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c index a85db27064..66f64f0f6a 100644 --- a/src/gallium/drivers/r300/r300_state_derived.c +++ b/src/gallium/drivers/r300/r300_state_derived.c @@ -213,19 +213,19 @@ static void r300_rs_tex(struct r300_rs_block* rs, int id, int ptr, enum r300_rs_swizzle swiz) { if (swiz == SWIZ_X001) { - rs->ip[id] |= R300_RS_TEX_PTR(ptr*4) | + rs->ip[id] |= R300_RS_TEX_PTR(ptr) | R300_RS_SEL_S(R300_RS_SEL_C0) | R300_RS_SEL_T(R300_RS_SEL_K0) | R300_RS_SEL_R(R300_RS_SEL_K0) | R300_RS_SEL_Q(R300_RS_SEL_K1); } else if (swiz == SWIZ_XY01) { - rs->ip[id] |= R300_RS_TEX_PTR(ptr*4) | + rs->ip[id] |= R300_RS_TEX_PTR(ptr) | R300_RS_SEL_S(R300_RS_SEL_C0) | R300_RS_SEL_T(R300_RS_SEL_C1) | R300_RS_SEL_R(R300_RS_SEL_K0) | R300_RS_SEL_Q(R300_RS_SEL_K1); } else { - rs->ip[id] |= R300_RS_TEX_PTR(ptr*4) | + rs->ip[id] |= R300_RS_TEX_PTR(ptr) | R300_RS_SEL_S(R300_RS_SEL_C0) | R300_RS_SEL_T(R300_RS_SEL_C1) | R300_RS_SEL_R(R300_RS_SEL_C2) | @@ -261,23 +261,21 @@ static void r500_rs_col_write(struct r300_rs_block* rs, int id, int fp_offset) static void r500_rs_tex(struct r300_rs_block* rs, int id, int ptr, enum r300_rs_swizzle swiz) { - int rs_tex_comp = ptr*4; - if (swiz == SWIZ_X001) { - rs->ip[id] |= R500_RS_SEL_S(rs_tex_comp) | + rs->ip[id] |= R500_RS_SEL_S(ptr) | R500_RS_SEL_T(R500_RS_IP_PTR_K0) | R500_RS_SEL_R(R500_RS_IP_PTR_K0) | R500_RS_SEL_Q(R500_RS_IP_PTR_K1); } else if (swiz == SWIZ_XY01) { - rs->ip[id] |= R500_RS_SEL_S(rs_tex_comp) | - R500_RS_SEL_T(rs_tex_comp + 1) | + rs->ip[id] |= R500_RS_SEL_S(ptr) | + R500_RS_SEL_T(ptr + 1) | R500_RS_SEL_R(R500_RS_IP_PTR_K0) | R500_RS_SEL_Q(R500_RS_IP_PTR_K1); } else { - rs->ip[id] |= R500_RS_SEL_S(rs_tex_comp) | - R500_RS_SEL_T(rs_tex_comp + 1) | - R500_RS_SEL_R(rs_tex_comp + 2) | - R500_RS_SEL_Q(rs_tex_comp + 3); + rs->ip[id] |= R500_RS_SEL_S(ptr) | + R500_RS_SEL_T(ptr + 1) | + R500_RS_SEL_R(ptr + 2) | + R500_RS_SEL_Q(ptr + 3); } rs->inst[id] |= R500_RS_INST_TEX_ID(id); } @@ -305,7 +303,7 @@ static void r300_update_rs_block(struct r300_context *r300) struct r300_shader_semantics *vs_outputs = &vs->outputs; struct r300_shader_semantics *fs_inputs = &r300_fs(r300)->shader->inputs; struct r300_rs_block rs = {0}; - int i, col_count = 0, tex_count = 0, fp_offset = 0, count, loc = 0; + int i, col_count = 0, tex_count = 0, fp_offset = 0, count, loc = 0, tex_ptr = 0; void (*rX00_rs_col)(struct r300_rs_block*, int, int, enum r300_rs_swizzle); void (*rX00_rs_col_write)(struct r300_rs_block*, int, int); void (*rX00_rs_tex)(struct r300_rs_block*, int, int, enum r300_rs_swizzle); @@ -393,8 +391,9 @@ static void r300_update_rs_block(struct r300_context *r300) stream_loc_notcl[loc++] = 6 + tex_count; /* Rasterize it. */ - rX00_rs_tex(&rs, tex_count, tex_count, SWIZ_XYZW); + rX00_rs_tex(&rs, tex_count, tex_ptr, SWIZ_XYZW); tex_count++; + tex_ptr += 4; } } } @@ -412,7 +411,7 @@ static void r300_update_rs_block(struct r300_context *r300) } /* Rasterize it. */ - rX00_rs_tex(&rs, tex_count, tex_count, + rX00_rs_tex(&rs, tex_count, tex_ptr, sprite_coord ? SWIZ_XY01 : SWIZ_XYZW); /* Write it to the FS input register if it's needed by the FS. */ @@ -429,6 +428,7 @@ static void r300_update_rs_block(struct r300_context *r300) i, sprite_coord ? " (sprite coord)" : ""); } tex_count++; + tex_ptr += sprite_coord ? 2 : 4; } else { /* Skip the FS input register, leave it uninitialized. */ /* If we try to set it to (0,0,0,1), it will lock up. */ @@ -449,7 +449,7 @@ static void r300_update_rs_block(struct r300_context *r300) stream_loc_notcl[loc++] = 6 + tex_count; /* Rasterize it. */ - rX00_rs_tex(&rs, tex_count, tex_count, SWIZ_X001); + rX00_rs_tex(&rs, tex_count, tex_ptr, SWIZ_X001); /* Write it to the FS input register if it's needed by the FS. */ if (fs_inputs->fog != ATTR_UNUSED) { @@ -461,6 +461,7 @@ static void r300_update_rs_block(struct r300_context *r300) DBG(r300, DBG_RS, "r300: Rasterized fog unused.\n"); } tex_count++; + tex_ptr += 4; } else { /* Skip the FS input register, leave it uninitialized. */ /* If we try to set it to (0,0,0,1), it will lock up. */ @@ -480,7 +481,7 @@ static void r300_update_rs_block(struct r300_context *r300) stream_loc_notcl[loc++] = 6 + tex_count; /* Rasterize it. */ - rX00_rs_tex(&rs, tex_count, tex_count, SWIZ_XYZW); + rX00_rs_tex(&rs, tex_count, tex_ptr, SWIZ_XYZW); /* Write it to the FS input register. */ rX00_rs_tex_write(&rs, tex_count, fp_offset); @@ -489,6 +490,7 @@ static void r300_update_rs_block(struct r300_context *r300) fp_offset++; tex_count++; + tex_ptr += 4; } /* Invalidate the rest of the no-TCL (GA) stream locations. */ @@ -507,7 +509,7 @@ static void r300_update_rs_block(struct r300_context *r300) DBG(r300, DBG_RS, "r300: --- Rasterizer status ---: colors: %i, " "generics: %i.\n", col_count, tex_count); - rs.count = (tex_count*4) | (col_count << R300_IC_COUNT_SHIFT) | + rs.count = MIN2(tex_ptr, 32) | (col_count << R300_IC_COUNT_SHIFT) | R300_HIRES_EN; count = MAX3(col_count, tex_count, 1); -- cgit v1.2.3 From 313df4156279f84ebc5b98a7540820b994762650 Mon Sep 17 00:00:00 2001 From: Jerome Glisse Date: Mon, 2 Aug 2010 14:21:33 -0400 Subject: r600g: add autogenerated reg definition + debug print cleanup Signed-off-by: Jerome Glisse --- src/gallium/drivers/r600/r600_reg.h | 72 ------------ src/gallium/drivers/r600/r600_screen.c | 6 +- src/gallium/drivers/r600/r600_shader.c | 2 +- src/gallium/drivers/r600/r600_state.c | 38 +++---- src/gallium/drivers/r600/r600_state_inlines.h | 152 +++++++++++++------------- src/gallium/drivers/r600/r600d.h | 97 ++++++++++++++++ 6 files changed, 194 insertions(+), 173 deletions(-) delete mode 100644 src/gallium/drivers/r600/r600_reg.h (limited to 'src/gallium') diff --git a/src/gallium/drivers/r600/r600_reg.h b/src/gallium/drivers/r600/r600_reg.h deleted file mode 100644 index 2600875d5c..0000000000 --- a/src/gallium/drivers/r600/r600_reg.h +++ /dev/null @@ -1,72 +0,0 @@ -#ifndef R600_REG_H -#define R600_REG_H - -/* for regs which haven't been generated yet */ - -#define R600_BLEND_ZERO 0 -#define R600_BLEND_ONE 1 -#define R600_BLEND_SRC_COLOR 2 -#define R600_BLEND_ONE_MINUS_SRC_COLOR 3 -#define R600_BLEND_SRC_ALPHA 4 -#define R600_BLEND_ONE_MINUS_SRC_ALPHA 5 -#define R600_BLEND_DST_ALPHA 6 -#define R600_BLEND_ONE_MINUS_DST_ALPHA 7 -#define R600_BLEND_DST_COLOR 8 -#define R600_BLEND_ONE_MINUS_DST_COLOR 9 -#define R600_BLEND_SRC_ALPHA_SATURATE 10 -#define R600_BLEND_BOTH_SRC_ALPHA 11 -#define R600_BLEND_BOTH_INV_SRC_ALPHA 12 -#define R600_BLEND_CONST_COLOR 13 -#define R600_BLEND_ONE_MINUS_CONST_COLOR 14 -#define R600_BLEND_SRC1_COLOR 15 -#define R600_BLEND_INV_SRC1_COLOR 16 -#define R600_BLEND_SRC1_ALPHA 17 -#define R600_BLEND_INV_SRC1_ALPHA 18 -#define R600_BLEND_CONST_ALPHA 19 -#define R600_BLEND_ONE_MINUS_CONST_ALPHA 20 - -#define R600_BLEND_FCN_ADD 0 -#define R600_BLEND_FCN_SUBTRACT 1 -#define R600_BLEND_FCN_MIN 2 -#define R600_BLEND_FCN_MAX 3 -#define R600_BLEND_FCN_RSUB 4 - -#define CB_BLEND_COLOR_SRCBLEND_SHIFT 0 -#define CB_BLEND_COLOR_COMB_FCN_SHIFT 5 -#define CB_BLEND_COLOR_DESTBLEND_SHIFT 8 -#define CB_BLEND_ALPHA_SRCBLEND_SHIFT 16 -#define CB_BLEND_ALPHA_COMB_FCN_SHIFT 21 -#define CB_BLEND_ALPHA_DESTBLEND_SHIFT 24 -#define CB_BLEND_SEPARATE_ALPHA_BLEND (1 << 29) - -#define SX_ALPHA_TEST_FUNC_SHIFT (0) -#define SX_ALPHA_TEST_ENABLE (1 << 3) - -#define R600_ZS_KEEP 0 -#define R600_ZS_ZERO 1 -#define R600_ZS_REPLACE 2 -#define R600_ZS_INCR 3 -#define R600_ZS_DECR 4 -#define R600_ZS_INVERT 5 -#define R600_ZS_INCR_WRAP 6 -#define R600_ZS_DECR_WRAP 7 - -#define R600_STENCILREF_SHIFT 0 -#define R600_STENCILMASK_SHIFT 8 -#define R600_STENCILWRITEMASK_SHIFT 16 - -#define PA_SU_PS_WIDTH_SHIFT 16 - -#define PA_SU_CULL_FRONT (1 << 0) -#define PA_SU_CULL_BACK (1 << 1) -#define PA_SU_FACE_CCW (0 << 2) -#define PA_SU_FACE_CW (1 << 2) - -#define PA_SU_POLYMODE_FRONT_SHIFT 5 -#define PA_SU_POLYMODE_BACK_SHIFT 5 -#define POLYGON_MODE_POINT 0 -#define POLYGON_MODE_LINE 1 -#define POLYGON_MODE_TRI 2 - - -#endif diff --git a/src/gallium/drivers/r600/r600_screen.c b/src/gallium/drivers/r600/r600_screen.c index e0d74ca558..68615ca162 100644 --- a/src/gallium/drivers/r600/r600_screen.c +++ b/src/gallium/drivers/r600/r600_screen.c @@ -106,7 +106,7 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param) case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER: return 0; default: - debug_printf("r600: unknown param %d\n", param); + R600_ERR("r600: unknown param %d\n", param); return 0; } } @@ -124,7 +124,7 @@ static float r600_get_paramf(struct pipe_screen* pscreen, enum pipe_cap param) case PIPE_CAP_MAX_TEXTURE_LOD_BIAS: return 16.0f; default: - debug_printf("r600: unsupported paramf %d\n", param); + R600_ERR("r600: unsupported paramf %d\n", param); return 0.0f; } } @@ -137,7 +137,7 @@ static boolean r600_is_format_supported(struct pipe_screen* screen, unsigned geom_flags) { if (target >= PIPE_MAX_TEXTURE_TYPES) { - debug_printf("r600: unsupported texture type %d\n", target); + R600_ERR("r600: unsupported texture type %d\n", target); return FALSE; } switch (format) { diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index 0582839905..34c6a444a3 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -129,7 +129,7 @@ static int r600_pipe_shader_vs(struct pipe_context *ctx, struct r600_context_sta struct r600_screen *rscreen = r600_screen(ctx->screen); struct r600_shader *rshader = &rpshader->shader; struct radeon_state *state; - unsigned i, j, tmp; + unsigned i, tmp; rpshader->rstate = radeon_state_decref(rpshader->rstate); state = radeon_state(rscreen->rw, R600_VS_SHADER_TYPE, R600_VS_SHADER); diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index bbfe4da840..60270e18a6 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -32,7 +32,6 @@ #include "r600_context.h" #include "r600_resource.h" #include "r600d.h" -#include "r600_reg.h" #include "r600_state_inlines.h" static void *r600_create_blend_state(struct pipe_context *ctx, @@ -292,7 +291,7 @@ static void r600_set_constant_buffer(struct pipe_context *ctx, type = R600_PS_CONSTANT_TYPE; break; default: - fprintf(stderr, "%s:%d unsupported %d\n", __func__, __LINE__, shader); + R600_ERR("unsupported %d\n", shader); return; } if (buffer && buffer->width0 > 0) { @@ -418,8 +417,7 @@ static void r600_set_index_buffer(struct pipe_context *ctx, if (ib) { pipe_resource_reference(&rctx->index_buffer.buffer, ib->buffer); memcpy(&rctx->index_buffer, ib, sizeof(rctx->index_buffer)); - } - else { + } else { pipe_resource_reference(&rctx->index_buffer.buffer, NULL); memset(&rctx->index_buffer, 0, sizeof(rctx->index_buffer)); } @@ -643,15 +641,15 @@ static struct radeon_state *r600_blend(struct r600_context *rctx) if (!state->rt[i].blend_enable) continue; - bc |= r600_translate_blend_function(eqRGB) << CB_BLEND_COLOR_COMB_FCN_SHIFT; - bc |= r600_translate_blend_factor(srcRGB) << CB_BLEND_COLOR_SRCBLEND_SHIFT; - bc |= r600_translate_blend_factor(dstRGB) << CB_BLEND_COLOR_DESTBLEND_SHIFT; + bc |= S_028804_COLOR_COMB_FCN(r600_translate_blend_function(eqRGB)); + bc |= S_028804_COLOR_SRCBLEND(r600_translate_blend_factor(srcRGB)); + bc |= S_028804_COLOR_DESTBLEND(r600_translate_blend_factor(dstRGB)); if (srcA != srcRGB || dstA != dstRGB || eqA != eqRGB) { - bc |= CB_BLEND_SEPARATE_ALPHA_BLEND; - bc |= r600_translate_blend_function(eqA) << CB_BLEND_ALPHA_COMB_FCN_SHIFT; - bc |= r600_translate_blend_factor(srcA) << CB_BLEND_ALPHA_SRCBLEND_SHIFT; - bc |= r600_translate_blend_factor(dstA) << CB_BLEND_ALPHA_DESTBLEND_SHIFT; + bc |= S_028804_SEPARATE_ALPHA_BLEND(1); + bc |= S_028804_ALPHA_COMB_FCN(r600_translate_blend_function(eqA)); + bc |= S_028804_ALPHA_SRCBLEND(r600_translate_blend_factor(srcA)); + bc |= S_028804_ALPHA_DESTBLEND(r600_translate_blend_factor(dstA)); } rstate->states[R600_BLEND__CB_BLEND0_CONTROL + i] = bc; @@ -895,26 +893,25 @@ static struct radeon_state *r600_dsa(struct r600_context *rctx) db_depth_control |= S_028800_STENCILFAIL(r600_translate_stencil_op(state->stencil[0].fail_op)); db_depth_control |= S_028800_STENCILZPASS(r600_translate_stencil_op(state->stencil[0].zpass_op)); db_depth_control |= S_028800_STENCILZFAIL(r600_translate_stencil_op(state->stencil[0].zfail_op)); - db_depth_control |= S_028800_BACKFACE_ENABLE(state->stencil[1].enabled); - stencil_ref_mask = (state->stencil[0].valuemask << R600_STENCILMASK_SHIFT) | - (state->stencil[0].writemask << R600_STENCILWRITEMASK_SHIFT); + stencil_ref_mask = S_028430_STENCILMASK(state->stencil[0].valuemask) | + S_028430_STENCILWRITEMASK(state->stencil[0].writemask); if (state->stencil[1].enabled) { db_depth_control |= S_028800_STENCILFUNC_BF(r600_translate_ds_func(state->stencil[1].func)); db_depth_control |= S_028800_STENCILFAIL_BF(r600_translate_stencil_op(state->stencil[1].fail_op)); db_depth_control |= S_028800_STENCILZPASS_BF(r600_translate_stencil_op(state->stencil[1].zpass_op)); db_depth_control |= S_028800_STENCILZFAIL_BF(r600_translate_stencil_op(state->stencil[1].zfail_op)); - stencil_ref_mask_bf = (state->stencil[1].valuemask << R600_STENCILMASK_SHIFT) | - (state->stencil[1].writemask << R600_STENCILWRITEMASK_SHIFT); + stencil_ref_mask_bf = S_028434_STENCILMASK_BF(state->stencil[1].valuemask) | + S_028434_STENCILWRITEMASK_BF(state->stencil[1].writemask); } } alpha_test_control = 0; alpha_ref = 0; if (state->alpha.enabled) { - alpha_test_control = (state->alpha.func) << 0; - alpha_test_control |= SX_ALPHA_TEST_ENABLE; + alpha_test_control = S_028410_ALPHA_FUNC(state->alpha.func); + alpha_test_control |= S_028410_ALPHA_TEST_ENABLE(1); alpha_ref = fui(state->alpha.ref_value); } @@ -1108,7 +1105,10 @@ static struct radeon_state *r600_resource(struct r600_context *rctx, if (r600_conv_pipe_format(view->texture->format, &format)) return NULL; desc = util_format_description(view->texture->format); - assert(desc == NULL); + if (desc == NULL) { + R600_ERR("unknow format %d\n", view->texture->format); + return NULL; + } rstate = radeon_state(rscreen->rw, R600_PS_RESOURCE_TYPE, id); if (rstate == NULL) { return NULL; diff --git a/src/gallium/drivers/r600/r600_state_inlines.h b/src/gallium/drivers/r600/r600_state_inlines.h index 369263dc8f..26a5dd0432 100644 --- a/src/gallium/drivers/r600/r600_state_inlines.h +++ b/src/gallium/drivers/r600/r600_state_inlines.h @@ -18,79 +18,75 @@ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. */ - + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ #ifndef R600_STATE_INLINES_H #define R600_STATE_INLINES_H -#include "r600_reg.h" - static INLINE uint32_t r600_translate_blend_function(int blend_func) { switch (blend_func) { - case PIPE_BLEND_ADD: - return R600_BLEND_FCN_ADD; - case PIPE_BLEND_SUBTRACT: - return R600_BLEND_FCN_SUBTRACT; - case PIPE_BLEND_REVERSE_SUBTRACT: - return R600_BLEND_FCN_RSUB; - case PIPE_BLEND_MIN: - return R600_BLEND_FCN_MIN; - case PIPE_BLEND_MAX: - return R600_BLEND_FCN_MAX; - default: - fprintf(stderr, "r600: Unknown blend function %d\n", blend_func); - assert(0); - break; - } - return 0; + case PIPE_BLEND_ADD: + return V_028804_COMB_DST_PLUS_SRC; + case PIPE_BLEND_SUBTRACT: + return V_028804_COMB_SRC_MINUS_DST; + case PIPE_BLEND_REVERSE_SUBTRACT: + return V_028804_COMB_DST_MINUS_SRC; + case PIPE_BLEND_MIN: + return V_028804_COMB_MIN_DST_SRC; + case PIPE_BLEND_MAX: + return V_028804_COMB_MAX_DST_SRC; + default: + R600_ERR("Unknown blend function %d\n", blend_func); + assert(0); + break; + } + return 0; } static INLINE uint32_t r600_translate_blend_factor(int blend_fact) { switch (blend_fact) { - case PIPE_BLENDFACTOR_ONE: - return R600_BLEND_ZERO; - case PIPE_BLENDFACTOR_SRC_COLOR: - return R600_BLEND_SRC_COLOR; - case PIPE_BLENDFACTOR_SRC_ALPHA: - return R600_BLEND_SRC_ALPHA; - case PIPE_BLENDFACTOR_DST_ALPHA: - return R600_BLEND_DST_ALPHA; - case PIPE_BLENDFACTOR_DST_COLOR: - return R600_BLEND_DST_COLOR; - case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: - return R600_BLEND_SRC_ALPHA_SATURATE; - case PIPE_BLENDFACTOR_CONST_COLOR: - return R600_BLEND_CONST_COLOR; - case PIPE_BLENDFACTOR_CONST_ALPHA: - return R600_BLEND_CONST_ALPHA; - case PIPE_BLENDFACTOR_ZERO: - return R600_BLEND_ZERO; - case PIPE_BLENDFACTOR_INV_SRC_COLOR: - return R600_BLEND_ONE_MINUS_SRC_COLOR; - case PIPE_BLENDFACTOR_INV_SRC_ALPHA: - return R600_BLEND_ONE_MINUS_SRC_ALPHA; - case PIPE_BLENDFACTOR_INV_DST_ALPHA: - return R600_BLEND_ONE_MINUS_DST_ALPHA; - case PIPE_BLENDFACTOR_INV_DST_COLOR: - return R600_BLEND_ONE_MINUS_DST_COLOR; - case PIPE_BLENDFACTOR_INV_CONST_COLOR: - return R600_BLEND_ONE_MINUS_CONST_COLOR; - case PIPE_BLENDFACTOR_INV_CONST_ALPHA: - return R600_BLEND_ONE_MINUS_CONST_ALPHA; - - case PIPE_BLENDFACTOR_SRC1_COLOR: - return R600_BLEND_SRC1_COLOR; - case PIPE_BLENDFACTOR_SRC1_ALPHA: - return R600_BLEND_SRC1_ALPHA; - case PIPE_BLENDFACTOR_INV_SRC1_COLOR: - return R600_BLEND_INV_SRC1_COLOR; - case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: - return R600_BLEND_INV_SRC1_ALPHA; + case PIPE_BLENDFACTOR_ONE: + return V_028804_BLEND_ZERO; + case PIPE_BLENDFACTOR_SRC_COLOR: + return V_028804_BLEND_SRC_COLOR; + case PIPE_BLENDFACTOR_SRC_ALPHA: + return V_028804_BLEND_SRC_ALPHA; + case PIPE_BLENDFACTOR_DST_ALPHA: + return V_028804_BLEND_DST_ALPHA; + case PIPE_BLENDFACTOR_DST_COLOR: + return V_028804_BLEND_DST_COLOR; + case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: + return V_028804_BLEND_SRC_ALPHA_SATURATE; + case PIPE_BLENDFACTOR_CONST_COLOR: + return V_028804_BLEND_CONST_COLOR; + case PIPE_BLENDFACTOR_CONST_ALPHA: + return V_028804_BLEND_CONST_ALPHA; + case PIPE_BLENDFACTOR_ZERO: + return V_028804_BLEND_ZERO; + case PIPE_BLENDFACTOR_INV_SRC_COLOR: + return V_028804_BLEND_ONE_MINUS_SRC_COLOR; + case PIPE_BLENDFACTOR_INV_SRC_ALPHA: + return V_028804_BLEND_ONE_MINUS_SRC_ALPHA; + case PIPE_BLENDFACTOR_INV_DST_ALPHA: + return V_028804_BLEND_ONE_MINUS_DST_ALPHA; + case PIPE_BLENDFACTOR_INV_DST_COLOR: + return V_028804_BLEND_ONE_MINUS_DST_COLOR; + case PIPE_BLENDFACTOR_INV_CONST_COLOR: + return V_028804_BLEND_ONE_MINUS_CONST_COLOR; + case PIPE_BLENDFACTOR_INV_CONST_ALPHA: + return V_028804_BLEND_ONE_MINUS_CONST_ALPHA; + case PIPE_BLENDFACTOR_SRC1_COLOR: + return V_028804_BLEND_SRC1_COLOR; + case PIPE_BLENDFACTOR_SRC1_ALPHA: + return V_028804_BLEND_SRC1_ALPHA; + case PIPE_BLENDFACTOR_INV_SRC1_COLOR: + return V_028804_BLEND_INV_SRC1_COLOR; + case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: + return V_028804_BLEND_INV_SRC1_ALPHA; default: - fprintf(stderr, "r600: Implementation error: " - "Bad blend factor %d not supported!\n", blend_fact); + R600_ERR("Bad blend factor %d not supported!\n", blend_fact); assert(0); break; } @@ -101,23 +97,23 @@ static INLINE uint32_t r600_translate_stencil_op(int s_op) { switch (s_op) { case PIPE_STENCIL_OP_KEEP: - return R600_ZS_KEEP; - case PIPE_STENCIL_OP_ZERO: - return R600_ZS_ZERO; - case PIPE_STENCIL_OP_REPLACE: - return R600_ZS_REPLACE; - case PIPE_STENCIL_OP_INCR: - return R600_ZS_INCR; - case PIPE_STENCIL_OP_DECR: - return R600_ZS_DECR; - case PIPE_STENCIL_OP_INCR_WRAP: - return R600_ZS_INCR_WRAP; - case PIPE_STENCIL_OP_DECR_WRAP: - return R600_ZS_DECR_WRAP; - case PIPE_STENCIL_OP_INVERT: - return R600_ZS_INVERT; - default: - fprintf(stderr, "r600: Unknown stencil op %d", s_op); + return V_028800_STENCIL_KEEP; + case PIPE_STENCIL_OP_ZERO: + return V_028800_STENCIL_ZERO; + case PIPE_STENCIL_OP_REPLACE: + return V_028800_STENCIL_REPLACE; + case PIPE_STENCIL_OP_INCR: + return V_028800_STENCIL_INCR; + case PIPE_STENCIL_OP_DECR: + return V_028800_STENCIL_DECR; + case PIPE_STENCIL_OP_INCR_WRAP: + return V_028800_STENCIL_INVERT; + case PIPE_STENCIL_OP_DECR_WRAP: + return V_028800_STENCIL_DECR_WRAP; + case PIPE_STENCIL_OP_INVERT: + return V_028800_STENCIL_INVERT; + default: + R600_ERR("Unknown stencil op %d", s_op); assert(0); break; } diff --git a/src/gallium/drivers/r600/r600d.h b/src/gallium/drivers/r600/r600d.h index c1acfcd29e..6085596ea8 100644 --- a/src/gallium/drivers/r600/r600d.h +++ b/src/gallium/drivers/r600/r600d.h @@ -249,6 +249,16 @@ #define S_028060_SLICE_TILE_MAX(x) (((x) & 0xFFFFF) << 10) #define G_028060_SLICE_TILE_MAX(x) (((x) >> 10) & 0xFFFFF) #define C_028060_SLICE_TILE_MAX 0xC00003FF +#define R_028410_SX_ALPHA_TEST_CONTROL 0x028410 +#define S_028410_ALPHA_FUNC(x) (((x) & 0x7) << 0) +#define G_028410_ALPHA_FUNC(x) (((x) >> 0) & 0x7) +#define C_028410_ALPHA_FUNC 0xFFFFFFF8 +#define S_028410_ALPHA_TEST_ENABLE(x) (((x) & 0x1) << 3) +#define G_028410_ALPHA_TEST_ENABLE(x) (((x) >> 3) & 0x1) +#define C_028410_ALPHA_TEST_ENABLE 0xFFFFFFF7 +#define S_028410_ALPHA_TEST_BYPASS(x) (((x) & 0x1) << 8) +#define G_028410_ALPHA_TEST_BYPASS(x) (((x) >> 8) & 0x1) +#define C_028410_ALPHA_TEST_BYPASS 0xFFFFFEFF #define R_028800_DB_DEPTH_CONTROL 0x028800 #define S_028800_STENCIL_ENABLE(x) (((x) & 0x1) << 0) #define G_028800_STENCIL_ENABLE(x) (((x) >> 0) & 0x1) @@ -268,9 +278,25 @@ #define S_028800_STENCILFUNC(x) (((x) & 0x7) << 8) #define G_028800_STENCILFUNC(x) (((x) >> 8) & 0x7) #define C_028800_STENCILFUNC 0xFFFFF8FF +#define V_028800_STENCILFUNC_NEVER 0x00000000 +#define V_028800_STENCILFUNC_LESS 0x00000001 +#define V_028800_STENCILFUNC_EQUAL 0x00000002 +#define V_028800_STENCILFUNC_LEQUAL 0x00000003 +#define V_028800_STENCILFUNC_GREATER 0x00000004 +#define V_028800_STENCILFUNC_NOTEQUAL 0x00000005 +#define V_028800_STENCILFUNC_GEQUAL 0x00000006 +#define V_028800_STENCILFUNC_ALWAYS 0x00000007 #define S_028800_STENCILFAIL(x) (((x) & 0x7) << 11) #define G_028800_STENCILFAIL(x) (((x) >> 11) & 0x7) #define C_028800_STENCILFAIL 0xFFFFC7FF +#define V_028800_STENCIL_KEEP 0x00000000 +#define V_028800_STENCIL_ZERO 0x00000001 +#define V_028800_STENCIL_REPLACE 0x00000002 +#define V_028800_STENCIL_INCR 0x00000003 +#define V_028800_STENCIL_DECR 0x00000004 +#define V_028800_STENCIL_INVERT 0x00000005 +#define V_028800_STENCIL_INCR_WRAP 0x00000006 +#define V_028800_STENCIL_DECR_WRAP 0x00000007 #define S_028800_STENCILZPASS(x) (((x) & 0x7) << 14) #define G_028800_STENCILZPASS(x) (((x) >> 14) & 0x7) #define C_028800_STENCILZPASS 0xFFFE3FFF @@ -316,6 +342,77 @@ #define S_028010_ZRANGE_PRECISION(x) (((x) & 0x1) << 31) #define G_028010_ZRANGE_PRECISION(x) (((x) >> 31) & 0x1) #define C_028010_ZRANGE_PRECISION 0x7FFFFFFF +#define R_028430_DB_STENCILREFMASK 0x028430 +#define S_028430_STENCILREF(x) (((x) & 0xFF) << 0) +#define G_028430_STENCILREF(x) (((x) >> 0) & 0xFF) +#define C_028430_STENCILREF 0xFFFFFF00 +#define S_028430_STENCILMASK(x) (((x) & 0xFF) << 8) +#define G_028430_STENCILMASK(x) (((x) >> 8) & 0xFF) +#define C_028430_STENCILMASK 0xFFFF00FF +#define S_028430_STENCILWRITEMASK(x) (((x) & 0xFF) << 16) +#define G_028430_STENCILWRITEMASK(x) (((x) >> 16) & 0xFF) +#define C_028430_STENCILWRITEMASK 0xFF00FFFF +#define R_028434_DB_STENCILREFMASK_BF 0x028434 +#define S_028434_STENCILREF_BF(x) (((x) & 0xFF) << 0) +#define G_028434_STENCILREF_BF(x) (((x) >> 0) & 0xFF) +#define C_028434_STENCILREF_BF 0xFFFFFF00 +#define S_028434_STENCILMASK_BF(x) (((x) & 0xFF) << 8) +#define G_028434_STENCILMASK_BF(x) (((x) >> 8) & 0xFF) +#define C_028434_STENCILMASK_BF 0xFFFF00FF +#define S_028434_STENCILWRITEMASK_BF(x) (((x) & 0xFF) << 16) +#define G_028434_STENCILWRITEMASK_BF(x) (((x) >> 16) & 0xFF) +#define C_028434_STENCILWRITEMASK_BF 0xFF00FFFF +#define R_028804_CB_BLEND_CONTROL 0x028804 +#define S_028804_COLOR_SRCBLEND(x) (((x) & 0x1F) << 0) +#define G_028804_COLOR_SRCBLEND(x) (((x) >> 0) & 0x1F) +#define C_028804_COLOR_SRCBLEND 0xFFFFFFE0 +#define V_028804_BLEND_ZERO 0x00000000 +#define V_028804_BLEND_ONE 0x00000001 +#define V_028804_BLEND_SRC_COLOR 0x00000002 +#define V_028804_BLEND_ONE_MINUS_SRC_COLOR 0x00000003 +#define V_028804_BLEND_SRC_ALPHA 0x00000004 +#define V_028804_BLEND_ONE_MINUS_SRC_ALPHA 0x00000005 +#define V_028804_BLEND_DST_ALPHA 0x00000006 +#define V_028804_BLEND_ONE_MINUS_DST_ALPHA 0x00000007 +#define V_028804_BLEND_DST_COLOR 0x00000008 +#define V_028804_BLEND_ONE_MINUS_DST_COLOR 0x00000009 +#define V_028804_BLEND_SRC_ALPHA_SATURATE 0x0000000A +#define V_028804_BLEND_BOTH_SRC_ALPHA 0x0000000B +#define V_028804_BLEND_BOTH_INV_SRC_ALPHA 0x0000000C +#define V_028804_BLEND_CONST_COLOR 0x0000000D +#define V_028804_BLEND_ONE_MINUS_CONST_COLOR 0x0000000E +#define V_028804_BLEND_SRC1_COLOR 0x0000000F +#define V_028804_BLEND_INV_SRC1_COLOR 0x00000010 +#define V_028804_BLEND_SRC1_ALPHA 0x00000011 +#define V_028804_BLEND_INV_SRC1_ALPHA 0x00000012 +#define V_028804_BLEND_CONST_ALPHA 0x00000013 +#define V_028804_BLEND_ONE_MINUS_CONST_ALPHA 0x00000014 +#define S_028804_COLOR_COMB_FCN(x) (((x) & 0x7) << 5) +#define G_028804_COLOR_COMB_FCN(x) (((x) >> 5) & 0x7) +#define C_028804_COLOR_COMB_FCN 0xFFFFFF1F +#define V_028804_COMB_DST_PLUS_SRC 0x00000000 +#define V_028804_COMB_SRC_MINUS_DST 0x00000001 +#define V_028804_COMB_MIN_DST_SRC 0x00000002 +#define V_028804_COMB_MAX_DST_SRC 0x00000003 +#define V_028804_COMB_DST_MINUS_SRC 0x00000004 +#define S_028804_COLOR_DESTBLEND(x) (((x) & 0x1F) << 8) +#define G_028804_COLOR_DESTBLEND(x) (((x) >> 8) & 0x1F) +#define C_028804_COLOR_DESTBLEND 0xFFFFE0FF +#define S_028804_OPACITY_WEIGHT(x) (((x) & 0x1) << 13) +#define G_028804_OPACITY_WEIGHT(x) (((x) >> 13) & 0x1) +#define C_028804_OPACITY_WEIGHT 0xFFFFDFFF +#define S_028804_ALPHA_SRCBLEND(x) (((x) & 0x1F) << 16) +#define G_028804_ALPHA_SRCBLEND(x) (((x) >> 16) & 0x1F) +#define C_028804_ALPHA_SRCBLEND 0xFFE0FFFF +#define S_028804_ALPHA_COMB_FCN(x) (((x) & 0x7) << 21) +#define G_028804_ALPHA_COMB_FCN(x) (((x) >> 21) & 0x7) +#define C_028804_ALPHA_COMB_FCN 0xFF1FFFFF +#define S_028804_ALPHA_DESTBLEND(x) (((x) & 0x1F) << 24) +#define G_028804_ALPHA_DESTBLEND(x) (((x) >> 24) & 0x1F) +#define C_028804_ALPHA_DESTBLEND 0xE0FFFFFF +#define S_028804_SEPARATE_ALPHA_BLEND(x) (((x) & 0x1) << 29) +#define G_028804_SEPARATE_ALPHA_BLEND(x) (((x) >> 29) & 0x1) +#define C_028804_SEPARATE_ALPHA_BLEND 0xDFFFFFFF #define R_028814_PA_SU_SC_MODE_CNTL 0x028814 #define S_028814_CULL_FRONT(x) (((x) & 0x1) << 0) #define G_028814_CULL_FRONT(x) (((x) >> 0) & 0x1) -- cgit v1.2.3 From f031817450fe75d3224f767d79938813287ac445 Mon Sep 17 00:00:00 2001 From: Jerome Glisse Date: Mon, 2 Aug 2010 17:41:52 -0400 Subject: r600g: split alu block to conform to limit + RCP opcode Signed-off-by: Jerome Glisse --- src/gallium/drivers/r600/r600_asm.c | 4 +++- src/gallium/drivers/r600/r600_shader.c | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c index e560f65dcd..386adde6b8 100644 --- a/src/gallium/drivers/r600/r600_asm.c +++ b/src/gallium/drivers/r600/r600_asm.c @@ -118,7 +118,9 @@ int r600_bc_add_alu(struct r600_bc *bc, const struct r600_bc_alu *alu) nalu->nliteral = 0; /* cf can contains only alu or only vtx or only tex */ - if (bc->cf_last == NULL || bc->cf_last->inst != (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3)) { + if (bc->cf_last == NULL || bc->cf_last->inst != (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3) || + (bc->cf_last->ndw >> 1) >= 120) { + /* at most 128 slots, one add alu can add 4 slots + 4 constant worst case */ r = r600_bc_add_cf(bc); if (r) { free(nalu); diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index 34c6a444a3..5bb16bbd3e 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -1044,7 +1044,7 @@ static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = { {TGSI_OPCODE_ARL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_MOV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2}, {TGSI_OPCODE_LIT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit}, - {TGSI_OPCODE_RCP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_RCP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE, tgsi_trans}, {TGSI_OPCODE_RSQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE, tgsi_trans}, {TGSI_OPCODE_EXP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_LOG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, -- cgit v1.2.3 From 57438adf3217955f16491ef8deeffafe05c2f7f8 Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Tue, 3 Aug 2010 14:15:24 +0200 Subject: r300g: handle polygon offset correctly https://bugs.freedesktop.org/show_bug.cgi?id=29372 --- src/gallium/drivers/r300/r300_state.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c index bc0c86d8a4..a3383c3878 100644 --- a/src/gallium/drivers/r300/r300_state.c +++ b/src/gallium/drivers/r300/r300_state.c @@ -744,7 +744,7 @@ static void r300_mark_fb_state_dirty(r300, R300_CHANGED_FB_STATE); /* Polygon offset depends on the zbuffer bit depth. */ - if (state->zsbuf && r300->polygon_offset_enabled) { + if (state->zsbuf) { switch (util_format_get_blocksize(state->zsbuf->texture->format)) { case 2: zbuffer_bpp = 16; @@ -756,7 +756,9 @@ static void if (r300->zbuffer_bpp != zbuffer_bpp) { r300->zbuffer_bpp = zbuffer_bpp; - r300->rs_state.dirty = TRUE; + + if (r300->polygon_offset_enabled) + r300->rs_state.dirty = TRUE; } } @@ -1095,9 +1097,7 @@ static void r300_bind_rs_state(struct pipe_context* pipe, void* state) } if (rs) { - r300->polygon_offset_enabled = (rs->rs.offset_point || - rs->rs.offset_line || - rs->rs.offset_tri); + r300->polygon_offset_enabled = rs->polygon_offset_enable; r300->sprite_coord_enable = rs->rs.sprite_coord_enable; r300->two_sided_color = rs->rs.light_twoside; } else { -- cgit v1.2.3 From 7e42b7e5d2aebcda0e6bf081b6661411731e6df2 Mon Sep 17 00:00:00 2001 From: Jerome Glisse Date: Tue, 3 Aug 2010 14:14:58 -0400 Subject: r600g: fix LIT + fix multiple constant one ALU + fix ALU block splitting Make sure LIT fills all slot for instruction (can't do W instruction without having the Z slot filled with at least a NOP). ALU instruction can't access more than 4 constant, move constant to temporary reg if we reach the limit. Fix ALU block splitting, only split ALU after ALU with last instruction bit sets. Signed-off-by: Jerome Glisse --- src/gallium/drivers/r600/Makefile | 2 +- src/gallium/drivers/r600/r600_asm.c | 6 +- src/gallium/drivers/r600/r600_asm.h | 1 + src/gallium/drivers/r600/r600_context.h | 4 +- src/gallium/drivers/r600/r600_shader.c | 307 ++++++++++++++++++++------------ 5 files changed, 207 insertions(+), 113 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/drivers/r600/Makefile b/src/gallium/drivers/r600/Makefile index 8f1e1366b5..fc94ae71f4 100644 --- a/src/gallium/drivers/r600/Makefile +++ b/src/gallium/drivers/r600/Makefile @@ -9,6 +9,7 @@ LIBRARY_INCLUDES = \ C_SOURCES = \ r600_buffer.c \ r600_context.c \ + r600_shader.c \ r600_draw.c \ r600_blit.c \ r600_helper.c \ @@ -17,7 +18,6 @@ C_SOURCES = \ r600_screen.c \ r600_state.c \ r600_texture.c \ - r600_shader.c \ r600_asm.c \ r700_asm.c diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c index 386adde6b8..f1dc3dc3a9 100644 --- a/src/gallium/drivers/r600/r600_asm.c +++ b/src/gallium/drivers/r600/r600_asm.c @@ -91,6 +91,7 @@ static int r600_bc_add_cf(struct r600_bc *bc) bc->cf_last = cf; bc->ncf++; bc->ndw += 2; + bc->force_add_cf = 0; return 0; } @@ -119,7 +120,7 @@ int r600_bc_add_alu(struct r600_bc *bc, const struct r600_bc_alu *alu) /* cf can contains only alu or only vtx or only tex */ if (bc->cf_last == NULL || bc->cf_last->inst != (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3) || - (bc->cf_last->ndw >> 1) >= 120) { + bc->force_add_cf) { /* at most 128 slots, one add alu can add 4 slots + 4 constant worst case */ r = r600_bc_add_cf(bc); if (r) { @@ -128,6 +129,9 @@ int r600_bc_add_alu(struct r600_bc *bc, const struct r600_bc_alu *alu) } bc->cf_last->inst = V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3; } + if (alu->last && (bc->cf_last->ndw >> 1) >= 124) { + bc->force_add_cf = 1; + } /* number of gpr == the last gpr used in any alu */ for (i = 0; i < 3; i++) { if (alu->src[i].sel >= bc->ngpr && alu->src[i].sel < 128) { diff --git a/src/gallium/drivers/r600/r600_asm.h b/src/gallium/drivers/r600/r600_asm.h index 88fb957440..3fd94dbda0 100644 --- a/src/gallium/drivers/r600/r600_asm.h +++ b/src/gallium/drivers/r600/r600_asm.h @@ -127,6 +127,7 @@ struct r600_bc { unsigned ncf; unsigned ngpr; unsigned nresource; + unsigned force_add_cf; u32 *bytecode; }; diff --git a/src/gallium/drivers/r600/r600_context.h b/src/gallium/drivers/r600/r600_context.h index a1ee9577ba..f8fdce50dc 100644 --- a/src/gallium/drivers/r600/r600_context.h +++ b/src/gallium/drivers/r600/r600_context.h @@ -200,10 +200,10 @@ void r600_init_state_functions(struct r600_context *rctx); void r600_init_query_functions(struct r600_context* rctx); struct pipe_context *r600_create_context(struct pipe_screen *screen, void *priv); -int r600_pipe_shader_create(struct pipe_context *ctx, +extern int r600_pipe_shader_create(struct pipe_context *ctx, struct r600_context_state *rstate, const struct tgsi_token *tokens); -int r600_pipe_shader_update(struct pipe_context *ctx, +extern int r600_pipe_shader_update(struct pipe_context *ctx, struct r600_context_state *rstate); #define R600_ERR(fmt, args...) \ diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index 5bb16bbd3e..7d304f5ae8 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -469,30 +469,14 @@ static int tgsi_end(struct r600_shader_ctx *ctx) static int tgsi_src(struct r600_shader_ctx *ctx, const struct tgsi_full_src_register *tgsi_src, - unsigned swizzle, struct r600_bc_alu_src *r600_src) { + memset(r600_src, 0, sizeof(struct r600_bc_alu_src)); r600_src->sel = tgsi_src->Register.Index; if (tgsi_src->Register.File == TGSI_FILE_IMMEDIATE) { r600_src->sel = 0; } r600_src->sel += ctx->file_offset[tgsi_src->Register.File]; - switch (swizzle) { - case 0: - r600_src->chan = tgsi_src->Register.SwizzleX; - break; - case 1: - r600_src->chan = tgsi_src->Register.SwizzleY; - break; - case 2: - r600_src->chan = tgsi_src->Register.SwizzleZ; - break; - case 3: - r600_src->chan = tgsi_src->Register.SwizzleW; - break; - default: - return -EINVAL; - } return 0; } @@ -513,12 +497,70 @@ static int tgsi_dst(struct r600_shader_ctx *ctx, return 0; } +static unsigned tgsi_chan(const struct tgsi_full_src_register *tgsi_src, unsigned swizzle) +{ + switch (swizzle) { + case 0: + return tgsi_src->Register.SwizzleX; + case 1: + return tgsi_src->Register.SwizzleY; + case 2: + return tgsi_src->Register.SwizzleZ; + case 3: + return tgsi_src->Register.SwizzleW; + default: + return 0; + } +} + +static int tgsi_split_constant(struct r600_shader_ctx *ctx, struct r600_bc_alu_src r600_src[3]) +{ + struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; + struct r600_bc_alu alu; + int i, j, k, nconst, r; + + for (i = 0, nconst = 0; i < inst->Instruction.NumSrcRegs; i++) { + if (inst->Src[i].Register.File == TGSI_FILE_CONSTANT) { + nconst++; + } + r = tgsi_src(ctx, &inst->Src[i], &r600_src[i]); + if (r) { + return r; + } + } + for (i = 0, j = nconst - 1; i < inst->Instruction.NumSrcRegs; i++) { + if (inst->Src[j].Register.File == TGSI_FILE_CONSTANT && j > 0) { + for (k = 0; k < 4; k++) { + memset(&alu, 0, sizeof(struct r600_bc_alu)); + alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV; + alu.src[0].sel = r600_src[0].sel; + alu.src[0].chan = k; + alu.dst.sel = ctx->temp_reg + j; + alu.dst.chan = k; + alu.dst.write = 1; + if (k == 3) + alu.last = 1; + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + } + r600_src[0].sel = ctx->temp_reg + j; + j--; + } + } + return 0; +} + static int tgsi_op2(struct r600_shader_ctx *ctx) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; + struct r600_bc_alu_src r600_src[3]; struct r600_bc_alu alu; int i, j, r; + r = tgsi_split_constant(ctx, r600_src); + if (r) + return r; for (i = 0; i < 4; i++) { memset(&alu, 0, sizeof(struct r600_bc_alu)); if (!(inst->Dst[0].Register.WriteMask & (1 << i))) { @@ -527,9 +569,8 @@ static int tgsi_op2(struct r600_shader_ctx *ctx) } else { alu.inst = ctx->inst_info->r600_opcode; for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { - r = tgsi_src(ctx, &inst->Src[j], i, &alu.src[j]); - if (r) - return r; + alu.src[j] = r600_src[j]; + alu.src[j].chan = tgsi_chan(&inst->Src[j], i); } r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); if (r) @@ -567,9 +608,10 @@ static int tgsi_kill(struct r600_shader_ctx *ctx) alu.inst = ctx->inst_info->r600_opcode; alu.dst.chan = i; alu.src[0].sel = 248; - r = tgsi_src(ctx, &inst->Src[0], i, &alu.src[1]); + r = tgsi_src(ctx, &inst->Src[0], &alu.src[1]); if (r) return r; + alu.src[1].chan = tgsi_chan(&inst->Src[0], i); if (i == 3) { alu.last = 1; } @@ -583,9 +625,13 @@ static int tgsi_kill(struct r600_shader_ctx *ctx) static int tgsi_slt(struct r600_shader_ctx *ctx) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; + struct r600_bc_alu_src r600_src[3]; struct r600_bc_alu alu; int i, r; + r = tgsi_split_constant(ctx, r600_src); + if (r) + return r; for (i = 0; i < 4; i++) { memset(&alu, 0, sizeof(struct r600_bc_alu)); if (!(inst->Dst[0].Register.WriteMask & (1 << i))) { @@ -593,12 +639,10 @@ static int tgsi_slt(struct r600_shader_ctx *ctx) alu.dst.chan = i; } else { alu.inst = ctx->inst_info->r600_opcode; - r = tgsi_src(ctx, &inst->Src[0], i, &alu.src[1]); - if (r) - return r; - r = tgsi_src(ctx, &inst->Src[1], i, &alu.src[0]); - if (r) - return r; + alu.src[1] = r600_src[0]; + alu.src[1].chan = tgsi_chan(&inst->Src[0], i); + alu.src[0] = r600_src[1]; + alu.src[0].chan = tgsi_chan(&inst->Src[1], i); r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); if (r) return r; @@ -619,60 +663,56 @@ static int tgsi_lit(struct r600_shader_ctx *ctx) struct r600_bc_alu alu; int r; - if (inst->Dst[0].Register.WriteMask & (1 << 0)) - { - /* dst.x, <- 1.0 */ - memset(&alu, 0, sizeof(struct r600_bc_alu)); - alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV; - alu.src[0].sel = 249; /*1.0*/ - alu.src[0].chan = 0; - r = tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst); - if (r) - return r; - if ((inst->Dst[0].Register.WriteMask & 0xe) == 0) - alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); - if (r) - return r; - } + /* dst.x, <- 1.0 */ + memset(&alu, 0, sizeof(struct r600_bc_alu)); + alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV; + alu.src[0].sel = 249; /*1.0*/ + alu.src[0].chan = 0; + r = tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst); + if (r) + return r; + alu.dst.write = (inst->Dst[0].Register.WriteMask >> 0) & 1; + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + /* dst.y = max(src.x, 0.0) */ + memset(&alu, 0, sizeof(struct r600_bc_alu)); + alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX; + r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); + if (r) + return r; + alu.src[1].sel = 248; /*0.0*/ + alu.src[1].chan = tgsi_chan(&inst->Src[0], 0); + r = tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst); + if (r) + return r; + alu.dst.write = (inst->Dst[0].Register.WriteMask >> 1) & 1; + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; - if (inst->Dst[0].Register.WriteMask & (1 << 1)) - { - /* dst.y = max(src.x, 0.0) */ - memset(&alu, 0, sizeof(struct r600_bc_alu)); - alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX; - r = tgsi_src(ctx, &inst->Src[0], 0, &alu.src[0]); - if (r) - return r; - alu.src[1].sel = 248; /*0.0*/ - alu.src[1].chan = 0; - r = tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst); - if (r) - return r; - if ((inst->Dst[0].Register.WriteMask & 0xa) == 0) - alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); - if (r) - return r; - } + /* dst.z = NOP - fill Z slot */ + memset(&alu, 0, sizeof(struct r600_bc_alu)); + alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP; + alu.dst.chan = 2; + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; - if (inst->Dst[0].Register.WriteMask & (1 << 3)) - { - /* dst.w, <- 1.0 */ - memset(&alu, 0, sizeof(struct r600_bc_alu)); - alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV; - alu.src[0].sel = 249; - alu.src[0].chan = 0; - r = tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst); - if (r) - return r; - if ((inst->Dst[0].Register.WriteMask & 0x4) == 0) - alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); - if (r) - return r; - } + /* dst.w, <- 1.0 */ + memset(&alu, 0, sizeof(struct r600_bc_alu)); + alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV; + alu.src[0].sel = 249; + alu.src[0].chan = 0; + r = tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst); + if (r) + return r; + alu.dst.write = (inst->Dst[0].Register.WriteMask >> 3) & 1; + alu.last = 1; + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; if (inst->Dst[0].Register.WriteMask & (1 << 2)) { @@ -682,9 +722,10 @@ static int tgsi_lit(struct r600_shader_ctx *ctx) /* dst.z = log(src.y) */ memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED; - r = tgsi_src(ctx, &inst->Src[0], 1, &alu.src[0]); + r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); if (r) return r; + alu.src[0].chan = tgsi_chan(&inst->Src[0], 1); r = tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst); if (r) return r; @@ -699,14 +740,16 @@ static int tgsi_lit(struct r600_shader_ctx *ctx) /* tmp.x = amd MUL_LIT(src.w, dst.z, src.x ) */ memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT; - r = tgsi_src(ctx, &inst->Src[0], 3, &alu.src[0]); + r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); if (r) - return r; + return r; + alu.src[0].chan = tgsi_chan(&inst->Src[0], 3); alu.src[1].sel = sel; alu.src[1].chan = chan; - r = tgsi_src(ctx, &inst->Src[0], 0, &alu.src[2]); + r = tgsi_src(ctx, &inst->Src[0], &alu.src[2]); if (r) return r; + alu.src[2].chan = tgsi_chan(&inst->Src[0], 0); alu.dst.sel = ctx->temp_reg; alu.dst.chan = 0; alu.dst.write = 1; @@ -743,9 +786,10 @@ static int tgsi_trans(struct r600_shader_ctx *ctx) if (inst->Dst[0].Register.WriteMask & (1 << i)) { alu.inst = ctx->inst_info->r600_opcode; for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { - r = tgsi_src(ctx, &inst->Src[j], i, &alu.src[j]); + r = tgsi_src(ctx, &inst->Src[j], &alu.src[j]); if (r) return r; + alu.src[j].chan = tgsi_chan(&inst->Src[j], i); } r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); if (r) @@ -759,6 +803,45 @@ static int tgsi_trans(struct r600_shader_ctx *ctx) return 0; } +static int tgsi_trans_srcx_replicate(struct r600_shader_ctx *ctx) +{ + struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; + struct r600_bc_alu alu; + int i, j, r; + + memset(&alu, 0, sizeof(struct r600_bc_alu)); + alu.inst = ctx->inst_info->r600_opcode; + for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { + r = tgsi_src(ctx, &inst->Src[j], &alu.src[j]); + if (r) + return r; + alu.src[j].chan = tgsi_chan(&inst->Src[j], 0); + } + alu.dst.sel = ctx->temp_reg; + alu.dst.write = 1; + alu.last = 1; + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + /* replicate result */ + for (i = 0; i < 4; i++) { + memset(&alu, 0, sizeof(struct r600_bc_alu)); + alu.src[0].sel = ctx->temp_reg; + alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV; + alu.dst.chan = i; + r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); + if (r) + return r; + alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1; + if (i == 3) + alu.last = 1; + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + } + return 0; +} + static int tgsi_helper_copy(struct r600_shader_ctx *ctx, struct tgsi_full_instruction *inst) { struct r600_bc_alu alu; @@ -793,17 +876,20 @@ static int tgsi_helper_copy(struct r600_shader_ctx *ctx, struct tgsi_full_instru static int tgsi_op3(struct r600_shader_ctx *ctx) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; + struct r600_bc_alu_src r600_src[3]; struct r600_bc_alu alu; int i, j, r; + r = tgsi_split_constant(ctx, r600_src); + if (r) + return r; /* do it in 2 step as op3 doesn't support writemask */ for (i = 0; i < 4; i++) { memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = ctx->inst_info->r600_opcode; for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { - r = tgsi_src(ctx, &inst->Src[j], i, &alu.src[j]); - if (r) - return r; + alu.src[j] = r600_src[j]; + alu.src[j].chan = tgsi_chan(&inst->Src[j], i); } alu.dst.sel = ctx->temp_reg; alu.dst.chan = i; @@ -822,16 +908,19 @@ static int tgsi_op3(struct r600_shader_ctx *ctx) static int tgsi_dp(struct r600_shader_ctx *ctx) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; + struct r600_bc_alu_src r600_src[3]; struct r600_bc_alu alu; int i, j, r; + r = tgsi_split_constant(ctx, r600_src); + if (r) + return r; for (i = 0; i < 4; i++) { memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = ctx->inst_info->r600_opcode; for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { - r = tgsi_src(ctx, &inst->Src[j], i, &alu.src[j]); - if (r) - return r; + alu.src[j] = r600_src[j]; + alu.src[j].chan = tgsi_chan(&inst->Src[j], i); } alu.dst.sel = ctx->temp_reg; alu.dst.chan = i; @@ -878,7 +967,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE; alu.src[0].sel = src_gpr; - alu.src[0].chan = 3; + alu.src[0].chan = tgsi_chan(&inst->Src[0], 3); alu.dst.sel = ctx->temp_reg; alu.dst.chan = 3; alu.last = 1; @@ -892,7 +981,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) alu.src[0].sel = ctx->temp_reg; alu.src[0].chan = 3; alu.src[1].sel = src_gpr; - alu.src[1].chan = 0; + alu.src[1].chan = tgsi_chan(&inst->Src[0], 0); alu.dst.sel = ctx->temp_reg; alu.dst.chan = 0; alu.dst.write = 1; @@ -904,7 +993,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) alu.src[0].sel = ctx->temp_reg; alu.src[0].chan = 3; alu.src[1].sel = src_gpr; - alu.src[1].chan = 1; + alu.src[1].chan = tgsi_chan(&inst->Src[0], 1); alu.dst.sel = ctx->temp_reg; alu.dst.chan = 1; alu.dst.write = 1; @@ -916,7 +1005,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) alu.src[0].sel = ctx->temp_reg; alu.src[0].chan = 3; alu.src[1].sel = src_gpr; - alu.src[1].chan = 2; + alu.src[1].chan = tgsi_chan(&inst->Src[0], 2); alu.dst.sel = ctx->temp_reg; alu.dst.chan = 2; alu.dst.write = 1; @@ -955,7 +1044,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) if (inst->Texture.Texture != TGSI_TEXTURE_RECT) { tex.coord_type_x = 1; tex.coord_type_y = 1; - tex.coord_type_z = 1; + tex.coord_type_z = 1; tex.coord_type_w = 1; } return r600_bc_add_tex(ctx->bc, &tex); @@ -964,19 +1053,22 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) static int tgsi_lrp(struct r600_shader_ctx *ctx) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; + struct r600_bc_alu_src r600_src[3]; struct r600_bc_alu alu; unsigned i; int r; + r = tgsi_split_constant(ctx, r600_src); + if (r) + return r; /* 1 - src0 */ for (i = 0; i < 4; i++) { memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD; alu.src[0].sel = 249; alu.src[0].chan = 0; - r = tgsi_src(ctx, &inst->Src[0], i, &alu.src[1]); - if (r) - return r; + alu.src[1] = r600_src[0]; + alu.src[1].chan = tgsi_chan(&inst->Src[0], i); alu.src[1].neg = 1; alu.dst.sel = ctx->temp_reg; alu.dst.chan = i; @@ -998,9 +1090,8 @@ static int tgsi_lrp(struct r600_shader_ctx *ctx) alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL; alu.src[0].sel = ctx->temp_reg; alu.src[0].chan = i; - r = tgsi_src(ctx, &inst->Src[2], i, &alu.src[1]); - if (r) - return r; + alu.src[1] = r600_src[2]; + alu.src[1].chan = tgsi_chan(&inst->Src[2], i); alu.dst.sel = ctx->temp_reg; alu.dst.chan = i; if (i == 3) { @@ -1020,12 +1111,10 @@ static int tgsi_lrp(struct r600_shader_ctx *ctx) memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD; alu.is_op3 = 1; - r = tgsi_src(ctx, &inst->Src[0], i, &alu.src[0]); - if (r) - return r; - r = tgsi_src(ctx, &inst->Src[1], i, &alu.src[1]); - if (r) - return r; + alu.src[0] = r600_src[0]; + alu.src[0].chan = tgsi_chan(&inst->Src[0], i); + alu.src[1] = r600_src[1]; + alu.src[1].chan = tgsi_chan(&inst->Src[1], i); alu.src[2].sel = ctx->temp_reg; alu.src[2].chan = i; alu.dst.sel = ctx->temp_reg; @@ -1044,8 +1133,8 @@ static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = { {TGSI_OPCODE_ARL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_MOV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2}, {TGSI_OPCODE_LIT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit}, - {TGSI_OPCODE_RCP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE, tgsi_trans}, - {TGSI_OPCODE_RSQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE, tgsi_trans}, + {TGSI_OPCODE_RCP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE, tgsi_trans_srcx_replicate}, + {TGSI_OPCODE_RSQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE, tgsi_trans_srcx_replicate}, {TGSI_OPCODE_EXP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_LOG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_MUL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2}, @@ -1071,7 +1160,7 @@ static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = { {TGSI_OPCODE_CLAMP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_FLR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_ROUND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_EX2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, tgsi_trans}, + {TGSI_OPCODE_EX2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, tgsi_trans_srcx_replicate}, {TGSI_OPCODE_LG2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_POW, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_XPD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, -- cgit v1.2.3 From 8f6341d42131e6f60f269610d62b7f5b7b683052 Mon Sep 17 00:00:00 2001 From: Jerome Glisse Date: Tue, 3 Aug 2010 15:49:21 -0400 Subject: r600g: fix stencil Signed-off-by: Jerome Glisse --- src/gallium/drivers/r600/r600_blit.c | 2 -- src/gallium/drivers/r600/r600_state.c | 13 +++++++------ 2 files changed, 7 insertions(+), 8 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/drivers/r600/r600_blit.c b/src/gallium/drivers/r600/r600_blit.c index cc37227ead..f4eedfe4cb 100644 --- a/src/gallium/drivers/r600/r600_blit.c +++ b/src/gallium/drivers/r600/r600_blit.c @@ -86,7 +86,6 @@ static void r600_clear_render_target(struct pipe_context *pipe, util_blitter_clear_render_target(rctx->blitter, dst, rgba, dstx, dsty, width, height); -R600_ERR("vtx elem %p\n", rctx->vertex_elements); } static void r600_clear_depth_stencil(struct pipe_context *pipe, @@ -105,7 +104,6 @@ static void r600_clear_depth_stencil(struct pipe_context *pipe, util_blitter_clear_depth_stencil(rctx->blitter, dst, clear_flags, depth, stencil, dstx, dsty, width, height); -R600_ERR("vtx elem %p\n", rctx->vertex_elements); } static void r600_resource_copy_region(struct pipe_context *pipe, diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index 60270e18a6..f687d31e4b 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -869,6 +869,7 @@ static struct radeon_state *r600_viewport(struct r600_context *rctx) static struct radeon_state *r600_dsa(struct r600_context *rctx) { const struct pipe_depth_stencil_alpha_state *state = &rctx->dsa->state.dsa; + const struct pipe_stencil_ref *stencil_ref = &rctx->stencil_ref->state.stencil_ref; struct r600_screen *rscreen = rctx->screen; struct radeon_state *rstate; unsigned db_depth_control, alpha_test_control, alpha_ref; @@ -880,30 +881,30 @@ static struct radeon_state *r600_dsa(struct r600_context *rctx) stencil_ref_mask = 0; stencil_ref_mask_bf = 0; - db_depth_control = 0x00700700 | - S_028800_Z_ENABLE(state->depth.enabled) | + db_depth_control = S_028800_Z_ENABLE(state->depth.enabled) | S_028800_Z_WRITE_ENABLE(state->depth.writemask) | S_028800_ZFUNC(state->depth.func); /* set stencil enable */ - db_depth_control |= S_028800_STENCIL_ENABLE(state->stencil[0].enabled); - - if (state->stencil[0].enabled) { + if (state->stencil[0].enabled) { + db_depth_control |= S_028800_STENCIL_ENABLE(1); db_depth_control |= S_028800_STENCILFUNC(r600_translate_ds_func(state->stencil[0].func)); db_depth_control |= S_028800_STENCILFAIL(r600_translate_stencil_op(state->stencil[0].fail_op)); db_depth_control |= S_028800_STENCILZPASS(r600_translate_stencil_op(state->stencil[0].zpass_op)); db_depth_control |= S_028800_STENCILZFAIL(r600_translate_stencil_op(state->stencil[0].zfail_op)); - db_depth_control |= S_028800_BACKFACE_ENABLE(state->stencil[1].enabled); stencil_ref_mask = S_028430_STENCILMASK(state->stencil[0].valuemask) | S_028430_STENCILWRITEMASK(state->stencil[0].writemask); + stencil_ref_mask |= S_028430_STENCILREF(stencil_ref->ref_value[0]); if (state->stencil[1].enabled) { + db_depth_control |= S_028800_BACKFACE_ENABLE(1); db_depth_control |= S_028800_STENCILFUNC_BF(r600_translate_ds_func(state->stencil[1].func)); db_depth_control |= S_028800_STENCILFAIL_BF(r600_translate_stencil_op(state->stencil[1].fail_op)); db_depth_control |= S_028800_STENCILZPASS_BF(r600_translate_stencil_op(state->stencil[1].zpass_op)); db_depth_control |= S_028800_STENCILZFAIL_BF(r600_translate_stencil_op(state->stencil[1].zfail_op)); stencil_ref_mask_bf = S_028434_STENCILMASK_BF(state->stencil[1].valuemask) | S_028434_STENCILWRITEMASK_BF(state->stencil[1].writemask); + stencil_ref_mask_bf |= S_028430_STENCILREF(stencil_ref->ref_value[1]); } } -- cgit v1.2.3 From 1fd84b10f269a32db66254aa567b1aad8b152fe8 Mon Sep 17 00:00:00 2001 From: Zack Rusin Date: Tue, 3 Aug 2010 15:56:17 -0400 Subject: gallium/util: add extra primitives to the trimmer --- src/gallium/auxiliary/util/u_prim.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'src/gallium') diff --git a/src/gallium/auxiliary/util/u_prim.h b/src/gallium/auxiliary/util/u_prim.h index 606b9b5c6b..63ddc86475 100644 --- a/src/gallium/auxiliary/util/u_prim.h +++ b/src/gallium/auxiliary/util/u_prim.h @@ -108,6 +108,19 @@ static INLINE boolean u_trim_pipe_prim( unsigned pipe_prim, unsigned *nr ) ok = (*nr >= 4); *nr -= (*nr % 2); break; + case PIPE_PRIM_LINES_ADJACENCY: + ok = (*nr >= 4); + *nr -= (*nr % 4); + break; + case PIPE_PRIM_LINE_STRIP_ADJACENCY: + break; + case PIPE_PRIM_TRIANGLES_ADJACENCY: + ok = (*nr >= 6); + *nr -= (*nr % 5); + break; + case PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY: + ok = (*nr >= 4); + break; default: ok = 0; break; -- cgit v1.2.3 From 6f9d3516a4f5b8abc9e8510dbccd93bf7874e6da Mon Sep 17 00:00:00 2001 From: Zack Rusin Date: Tue, 3 Aug 2010 16:02:09 -0400 Subject: gallium/draw: forgot about PIPE_PRIM_LINE_STRIP_ADJACENCY --- src/gallium/auxiliary/util/u_prim.h | 1 + 1 file changed, 1 insertion(+) (limited to 'src/gallium') diff --git a/src/gallium/auxiliary/util/u_prim.h b/src/gallium/auxiliary/util/u_prim.h index 63ddc86475..3c851f7340 100644 --- a/src/gallium/auxiliary/util/u_prim.h +++ b/src/gallium/auxiliary/util/u_prim.h @@ -113,6 +113,7 @@ static INLINE boolean u_trim_pipe_prim( unsigned pipe_prim, unsigned *nr ) *nr -= (*nr % 4); break; case PIPE_PRIM_LINE_STRIP_ADJACENCY: + ok = (*nr >= 4); break; case PIPE_PRIM_TRIANGLES_ADJACENCY: ok = (*nr >= 6); -- cgit v1.2.3 From 3a8d4a89795d180b910b2c0dfa98c57cf9bb45a6 Mon Sep 17 00:00:00 2001 From: Jerome Glisse Date: Tue, 3 Aug 2010 16:15:17 -0400 Subject: r600g: fix color target mask Signed-off-by: Jerome Glisse --- src/gallium/drivers/r600/r600_state.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index f687d31e4b..f0abafd6c2 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -1182,8 +1182,7 @@ static struct radeon_state *r600_cb_cntl(struct r600_context *rctx) if (pbs->rt[i].blend_enable) { color_control |= (1 << (8 + i)); target_mask |= (pbs->rt[0].colormask << (4 * i)); - } else if (i == 0) - target_mask |= 0xf; + } } rstate = radeon_state(rscreen->rw, R600_CB_CNTL_TYPE, R600_CB_CNTL); rstate->states[R600_CB_CNTL__CB_SHADER_MASK] = 0x0000000F; -- cgit v1.2.3 From a0b3944c30fdecf5ef97fd602b8a286bcc80fed3 Mon Sep 17 00:00:00 2001 From: Jerome Glisse Date: Tue, 3 Aug 2010 17:02:48 -0400 Subject: r600g: flush and resubmit if we reach limit Signed-off-by: Jerome Glisse --- src/gallium/drivers/r600/r600_context.c | 2 +- src/gallium/drivers/r600/r600_context.h | 38 ++------------------------------- src/gallium/drivers/r600/r600_draw.c | 4 ++++ src/gallium/drivers/r600/r600_helper.c | 6 ++++++ 4 files changed, 13 insertions(+), 37 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/drivers/r600/r600_context.c b/src/gallium/drivers/r600/r600_context.c index 8b191914f5..ae1780a1d4 100644 --- a/src/gallium/drivers/r600/r600_context.c +++ b/src/gallium/drivers/r600/r600_context.c @@ -41,7 +41,7 @@ static void r600_destroy_context(struct pipe_context *context) FREE(rctx); } -static void r600_flush(struct pipe_context *ctx, unsigned flags, +void r600_flush(struct pipe_context *ctx, unsigned flags, struct pipe_fence_handle **fence) { struct r600_context *rctx = r600_context(ctx); diff --git a/src/gallium/drivers/r600/r600_context.h b/src/gallium/drivers/r600/r600_context.h index f8fdce50dc..8d102b6850 100644 --- a/src/gallium/drivers/r600/r600_context.h +++ b/src/gallium/drivers/r600/r600_context.h @@ -113,23 +113,6 @@ struct r600_context { struct radeon_draw *draw; /* hw states */ struct r600_context_hw_states hw_states; -#if 0 - struct r600_pipe_shader *ps_shader; - struct r600_pipe_shader *vs_shader; - unsigned nps_sampler; - struct radeon_state *ps_sampler[PIPE_MAX_ATTRIBS]; - unsigned nps_view; - unsigned nvs_view; - struct r600_texture_resource *ps_view[PIPE_MAX_ATTRIBS]; - struct r600_texture_resource *vs_view[PIPE_MAX_ATTRIBS]; - unsigned flat_shade; - unsigned nvertex_buffer; - struct r600_vertex_elements_state *vertex_elements; - struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS]; - struct pipe_stencil_ref stencil_ref; - struct pipe_framebuffer_state fb_state; - struct pipe_viewport_state viewport; -#endif /* pipe states */ unsigned flat_shade; unsigned ps_nsampler; @@ -161,25 +144,6 @@ struct r600_context { struct pipe_blend_color blend_color; }; -#if 0 -struct r600_vertex_elements_state -{ - unsigned count; - struct pipe_vertex_element elements[32]; -}; - -struct r600_pipe_shader { - struct r600_shader shader; - struct radeon_bo *bo; - struct radeon_state *state; -}; - -struct r600_texture_resource { - struct pipe_sampler_view view; - struct radeon_state *state; -}; -#endif - /* Convenience cast wrapper. */ static INLINE struct r600_context *r600_context(struct pipe_context *pipe) { @@ -189,6 +153,8 @@ static INLINE struct r600_context *r600_context(struct pipe_context *pipe) struct r600_context_state *r600_context_state(struct r600_context *rctx, unsigned type, const void *state); struct r600_context_state *r600_context_state_incref(struct r600_context_state *rstate); struct r600_context_state *r600_context_state_decref(struct r600_context_state *rstate); +void r600_flush(struct pipe_context *ctx, unsigned flags, + struct pipe_fence_handle **fence); int r600_context_hw_states(struct r600_context *rctx); diff --git a/src/gallium/drivers/r600/r600_draw.c b/src/gallium/drivers/r600/r600_draw.c index c52dfd3fb3..43c805b982 100644 --- a/src/gallium/drivers/r600/r600_draw.c +++ b/src/gallium/drivers/r600/r600_draw.c @@ -170,6 +170,10 @@ static int r600_draw_common(struct r600_draw *draw) return r; /* FIXME */ r = radeon_ctx_set_draw_new(rctx->ctx, rctx->draw); + if (r == -EBUSY) { + r600_flush(draw->ctx, 0, NULL); + r = radeon_ctx_set_draw_new(rctx->ctx, rctx->draw); + } if (r) return r; rctx->draw = radeon_draw_duplicate(rctx->draw); diff --git a/src/gallium/drivers/r600/r600_helper.c b/src/gallium/drivers/r600/r600_helper.c index 132abf90a3..c672fe7386 100644 --- a/src/gallium/drivers/r600/r600_helper.c +++ b/src/gallium/drivers/r600/r600_helper.c @@ -61,6 +61,12 @@ int r600_conv_pipe_format(unsigned pformat, unsigned *format) case PIPE_FORMAT_I8_UNORM: *format = V_0280A0_COLOR_8; return 0; + case PIPE_FORMAT_B4G4R4A4_UNORM: + *format = V_0280A0_COLOR_4_4_4_4; + return 0; + case PIPE_FORMAT_B5G6R5_UNORM: + *format = V_0280A0_COLOR_5_6_5; + return 0; case PIPE_FORMAT_L16_UNORM: case PIPE_FORMAT_Z16_UNORM: case PIPE_FORMAT_Z32_UNORM: -- cgit v1.2.3 From dd863bf5e7c6680075cf7c355a026b1da69ee9e3 Mon Sep 17 00:00:00 2001 From: Jerome Glisse Date: Tue, 3 Aug 2010 17:45:51 -0400 Subject: r600g: add polygon offset support Signed-off-by: Jerome Glisse --- src/gallium/drivers/r600/r600_shader.c | 2 +- src/gallium/drivers/r600/r600_state.c | 44 +++++++++++++++++++++++++++++----- src/gallium/drivers/r600/r600d.h | 23 ++++++++++++++++++ 3 files changed, 62 insertions(+), 7 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index 7d304f5ae8..43b3e40fad 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -1142,7 +1142,7 @@ static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = { {TGSI_OPCODE_DP3, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, {TGSI_OPCODE_DP4, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, {TGSI_OPCODE_DST, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_MIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_MIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2}, {TGSI_OPCODE_MAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2}, {TGSI_OPCODE_SLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_slt}, {TGSI_OPCODE_SGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index f0abafd6c2..72c0ac5dd1 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -765,8 +765,37 @@ static struct radeon_state *r600_db(struct r600_context *rctx) static struct radeon_state *r600_rasterizer(struct r600_context *rctx) { const struct pipe_rasterizer_state *state = &rctx->rasterizer->state.rasterizer; + const struct pipe_framebuffer_state *fb = &rctx->framebuffer->state.framebuffer; struct r600_screen *rscreen = rctx->screen; struct radeon_state *rstate; + float offset_units = 0, offset_scale = 0; + char depth = 0; + unsigned offset_db_fmt_cntl = 0; + + if (fb->zsbuf) { + offset_units = state->offset_units; + offset_scale = state->offset_scale * 12.0f; + switch (fb->zsbuf->texture->format) { + case PIPE_FORMAT_Z24X8_UNORM: + case PIPE_FORMAT_Z24_UNORM_S8_USCALED: + depth = -24; + offset_units *= 2.0f; + break; + case PIPE_FORMAT_Z32_FLOAT: + depth = -23; + offset_units *= 1.0f; + offset_db_fmt_cntl |= S_028DF8_POLY_OFFSET_DB_IS_FLOAT_FMT(1); + break; + case PIPE_FORMAT_Z16_UNORM: + depth = -16; + offset_units *= 4.0f; + break; + default: + R600_ERR("unsupported %d\n", fb->zsbuf->texture->format); + return NULL; + } + } + offset_db_fmt_cntl |= S_028DF8_POLY_OFFSET_NEG_NUM_DB_BITS(depth); rctx->flat_shade = state->flatshade; rstate = radeon_state(rscreen->rw, R600_RASTERIZER_TYPE, R600_RASTERIZER); @@ -777,7 +806,10 @@ static struct radeon_state *r600_rasterizer(struct r600_context *rctx) rstate->states[R600_RASTERIZER__PA_SU_SC_MODE_CNTL] = 0x00080000 | S_028814_CULL_FRONT((state->cull_face & PIPE_FACE_FRONT) ? 1 : 0) | S_028814_CULL_BACK((state->cull_face & PIPE_FACE_BACK) ? 1 : 0) | - S_028814_FACE(!state->front_ccw); + S_028814_FACE(!state->front_ccw) | + S_028814_POLY_OFFSET_FRONT_ENABLE(state->offset_tri) | + S_028814_POLY_OFFSET_BACK_ENABLE(state->offset_tri) | + S_028814_POLY_OFFSET_PARA_ENABLE(state->offset_tri); rstate->states[R600_RASTERIZER__PA_CL_VS_OUT_CNTL] = 0x00000000; rstate->states[R600_RASTERIZER__PA_CL_NANINF_CNTL] = 0x00000000; rstate->states[R600_RASTERIZER__PA_SU_POINT_SIZE] = 0x00080008; @@ -790,12 +822,12 @@ static struct radeon_state *r600_rasterizer(struct r600_context *rctx) rstate->states[R600_RASTERIZER__PA_CL_GB_VERT_DISC_ADJ] = 0x3F800000; rstate->states[R600_RASTERIZER__PA_CL_GB_HORZ_CLIP_ADJ] = 0x3F800000; rstate->states[R600_RASTERIZER__PA_CL_GB_HORZ_DISC_ADJ] = 0x3F800000; - rstate->states[R600_RASTERIZER__PA_SU_POLY_OFFSET_DB_FMT_CNTL] = 0x00000000; + rstate->states[R600_RASTERIZER__PA_SU_POLY_OFFSET_DB_FMT_CNTL] = offset_db_fmt_cntl; rstate->states[R600_RASTERIZER__PA_SU_POLY_OFFSET_CLAMP] = 0x00000000; - rstate->states[R600_RASTERIZER__PA_SU_POLY_OFFSET_FRONT_SCALE] = 0x00000000; - rstate->states[R600_RASTERIZER__PA_SU_POLY_OFFSET_FRONT_OFFSET] = 0x00000000; - rstate->states[R600_RASTERIZER__PA_SU_POLY_OFFSET_BACK_SCALE] = 0x00000000; - rstate->states[R600_RASTERIZER__PA_SU_POLY_OFFSET_BACK_OFFSET] = 0x00000000; + rstate->states[R600_RASTERIZER__PA_SU_POLY_OFFSET_FRONT_SCALE] = fui(offset_scale); + rstate->states[R600_RASTERIZER__PA_SU_POLY_OFFSET_FRONT_OFFSET] = fui(offset_units); + rstate->states[R600_RASTERIZER__PA_SU_POLY_OFFSET_BACK_SCALE] = fui(offset_scale); + rstate->states[R600_RASTERIZER__PA_SU_POLY_OFFSET_BACK_OFFSET] = fui(offset_units); if (radeon_state_pm4(rstate)) { radeon_state_decref(rstate); return NULL; diff --git a/src/gallium/drivers/r600/r600d.h b/src/gallium/drivers/r600/r600d.h index 6085596ea8..24cb8593f7 100644 --- a/src/gallium/drivers/r600/r600d.h +++ b/src/gallium/drivers/r600/r600d.h @@ -539,6 +539,29 @@ #define S_028D10_IGNORE_SC_ZRANGE(x) (((x) & 0x1) << 17) #define G_028D10_IGNORE_SC_ZRANGE(x) (((x) >> 17) & 0x1) #define C_028D10_IGNORE_SC_ZRANGE 0xFFFDFFFF +#define R_028DF8_PA_SU_POLY_OFFSET_DB_FMT_CNTL 0x028DF8 +#define S_028DF8_POLY_OFFSET_NEG_NUM_DB_BITS(x) (((x) & 0xFF) << 0) +#define G_028DF8_POLY_OFFSET_NEG_NUM_DB_BITS(x) (((x) >> 0) & 0xFF) +#define C_028DF8_POLY_OFFSET_NEG_NUM_DB_BITS 0xFFFFFF00 +#define S_028DF8_POLY_OFFSET_DB_IS_FLOAT_FMT(x) (((x) & 0x1) << 8) +#define G_028DF8_POLY_OFFSET_DB_IS_FLOAT_FMT(x) (((x) >> 8) & 0x1) +#define C_028DF8_POLY_OFFSET_DB_IS_FLOAT_FMT 0xFFFFFEFF +#define R_028E00_PA_SU_POLY_OFFSET_FRONT_SCALE 0x028E00 +#define S_028E00_SCALE(x) (((x) & 0xFFFFFFFF) << 0) +#define G_028E00_SCALE(x) (((x) >> 0) & 0xFFFFFFFF) +#define C_028E00_SCALE 0x00000000 +#define R_028E04_PA_SU_POLY_OFFSET_FRONT_OFFSET 0x028E04 +#define S_028E04_OFFSET(x) (((x) & 0xFFFFFFFF) << 0) +#define G_028E04_OFFSET(x) (((x) >> 0) & 0xFFFFFFFF) +#define C_028E04_OFFSET 0x00000000 +#define R_028E08_PA_SU_POLY_OFFSET_BACK_SCALE 0x028E08 +#define S_028E08_SCALE(x) (((x) & 0xFFFFFFFF) << 0) +#define G_028E08_SCALE(x) (((x) >> 0) & 0xFFFFFFFF) +#define C_028E08_SCALE 0x00000000 +#define R_028E0C_PA_SU_POLY_OFFSET_BACK_OFFSET 0x028E0C +#define S_028E0C_OFFSET(x) (((x) & 0xFFFFFFFF) << 0) +#define G_028E0C_OFFSET(x) (((x) >> 0) & 0xFFFFFFFF) +#define C_028E0C_OFFSET 0x00000000 #define R_028A40_VGT_GS_MODE 0x028A40 #define S_028A40_MODE(x) (((x) & 0x3) << 0) #define G_028A40_MODE(x) (((x) >> 0) & 0x3) -- cgit v1.2.3 From afbf446de5ad91743e2e2a76799bd87f3eed5f9a Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Sat, 31 Jul 2010 02:19:27 +0200 Subject: st/dri: fix crash when dri2_drawable_get_buffers fails It's easily reproducible with Compiz with its Resize window mode set to Normal (which is usually not the default mode). https://bugs.freedesktop.org/show_bug.cgi?id=28658 https://bugs.freedesktop.org/show_bug.cgi?id=29303 This is actually a workaround to prevent Compiz crashes. Instead, a completely white titlebar might show up during resizing transparent windows (a rare case). The underlying cause should be fixed by someone who has more knowledge about the code. (dri2_drawable_get_buffers should not return NULL) Acked-By: Jakob Bornecrantz --- src/gallium/state_trackers/dri/drm/dri2.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'src/gallium') diff --git a/src/gallium/state_trackers/dri/drm/dri2.c b/src/gallium/state_trackers/dri/drm/dri2.c index 88ffa1e89d..1fb8996337 100644 --- a/src/gallium/state_trackers/dri/drm/dri2.c +++ b/src/gallium/state_trackers/dri/drm/dri2.c @@ -351,7 +351,8 @@ dri2_allocate_textures(struct dri_drawable *drawable, unsigned num_buffers = count; buffers = dri2_drawable_get_buffers(drawable, statts, &num_buffers); - dri2_drawable_process_buffers(drawable, buffers, num_buffers); + if (buffers) + dri2_drawable_process_buffers(drawable, buffers, num_buffers); } static void -- cgit v1.2.3 From f94e7e25d101a8691a5276a5aa0eb97d2f4eebbc Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Tue, 3 Aug 2010 14:12:47 -0600 Subject: draw: add assertion, rearrange debug code --- src/gallium/auxiliary/draw/draw_pt.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/auxiliary/draw/draw_pt.c b/src/gallium/auxiliary/draw/draw_pt.c index adef26a167..0a41328642 100644 --- a/src/gallium/auxiliary/draw/draw_pt.c +++ b/src/gallium/auxiliary/draw/draw_pt.c @@ -342,19 +342,22 @@ draw_arrays_instanced(struct draw_context *draw, unsigned reduced_prim = u_reduced_prim(mode); unsigned instance; + assert(instanceCount > 0); + if (reduced_prim != draw->reduced_prim) { draw_do_flush(draw, DRAW_FLUSH_STATE_CHANGE); draw->reduced_prim = reduced_prim; } if (0) - draw_print_arrays(draw, mode, start, MIN2(count, 20)); - - if (0) { - unsigned int i; debug_printf("draw_arrays(mode=%u start=%u count=%u):\n", mode, start, count); + + if (0) tgsi_dump(draw->vs.vertex_shader->state.tokens, 0); + + if (0) { + unsigned int i; debug_printf("Elements:\n"); for (i = 0; i < draw->pt.nr_vertex_elements; i++) { debug_printf(" %u: src_offset=%u inst_div=%u vbuf=%u format=%s\n", @@ -375,6 +378,9 @@ draw_arrays_instanced(struct draw_context *draw, } } + if (0) + draw_print_arrays(draw, mode, start, MIN2(count, 20)); + for (instance = 0; instance < instanceCount; instance++) { draw->instance_id = instance + startInstance; draw_pt_arrays(draw, mode, start, count); -- cgit v1.2.3 From 0e7b53c75be6051b2a935e65f4dbc02449714ee1 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Tue, 3 Aug 2010 14:13:13 -0600 Subject: draw: use instance divisor in draw_print_arrays() --- src/gallium/auxiliary/draw/draw_pt.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'src/gallium') diff --git a/src/gallium/auxiliary/draw/draw_pt.c b/src/gallium/auxiliary/draw/draw_pt.c index 0a41328642..248927505d 100644 --- a/src/gallium/auxiliary/draw/draw_pt.c +++ b/src/gallium/auxiliary/draw/draw_pt.c @@ -259,6 +259,11 @@ draw_print_arrays(struct draw_context *draw, uint prim, int start, uint count) for (j = 0; j < draw->pt.nr_vertex_elements; j++) { uint buf = draw->pt.vertex_element[j].vertex_buffer_index; ubyte *ptr = (ubyte *) draw->pt.user.vbuffer[buf]; + + if (draw->pt.vertex_element[j].instance_divisor) { + ii = draw->instance_id / draw->pt.vertex_element[j].instance_divisor; + } + ptr += draw->pt.vertex_buffer[buf].buffer_offset; ptr += draw->pt.vertex_buffer[buf].stride * ii; ptr += draw->pt.vertex_element[j].src_offset; -- cgit v1.2.3 From 8c3cc83432bbbc190b0f71f87e3e9583ae11f0b2 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Tue, 3 Aug 2010 16:21:57 -0600 Subject: draw: added a comment --- src/gallium/auxiliary/draw/draw_pt_emit.c | 1 + 1 file changed, 1 insertion(+) (limited to 'src/gallium') diff --git a/src/gallium/auxiliary/draw/draw_pt_emit.c b/src/gallium/auxiliary/draw/draw_pt_emit.c index 0229bcc7fe..5568fbb9f8 100644 --- a/src/gallium/auxiliary/draw/draw_pt_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_emit.c @@ -182,6 +182,7 @@ void draw_pt_emit( struct pt_emit *emit, 0, ~0); + /* fetch/translate vertex attribs to fill hw_verts[] */ translate->run( translate, 0, vertex_count, -- cgit v1.2.3 From e89e47e6d03f0c8b421268481d077100c2247253 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Tue, 3 Aug 2010 16:30:40 -0600 Subject: gallium/translate: make generic_run() and generic_run_elts() more alike Plus more debug code and do clamping in generic_run(). --- .../auxiliary/translate/translate_generic.c | 63 +++++++++++++++------- 1 file changed, 44 insertions(+), 19 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/auxiliary/translate/translate_generic.c b/src/gallium/auxiliary/translate/translate_generic.c index 0e43a512ee..4d1977229e 100644 --- a/src/gallium/auxiliary/translate/translate_generic.c +++ b/src/gallium/auxiliary/translate/translate_generic.c @@ -368,23 +368,23 @@ static void PIPE_CDECL generic_run_elts( struct translate *translate, /* loop over vertex attributes (vertex shader inputs) */ for (i = 0; i < count; i++) { - unsigned elt = *elts++; + const unsigned elt = *elts++; for (attr = 0; attr < nr_attrs; attr++) { float data[4]; - const uint8_t *src; - unsigned index; - - char *dst = (vert + - tg->attrib[attr].output_offset); + char *dst = vert + tg->attrib[attr].output_offset; if (tg->attrib[attr].type == TRANSLATE_ELEMENT_NORMAL) { + const uint8_t *src; + unsigned index; + if (tg->attrib[attr].instance_divisor) { index = instance_id / tg->attrib[attr].instance_divisor; } else { index = elt; } + /* clamp to void going out of bounds */ index = MIN2(index, tg->attrib[attr].max_index); src = tg->attrib[attr].input_ptr + @@ -392,11 +392,23 @@ static void PIPE_CDECL generic_run_elts( struct translate *translate, tg->attrib[attr].fetch( data, src, 0, 0 ); + if (0) + debug_printf("Fetch elt attr %d from %p stride %d div %u max %u index %d: " + " %f, %f, %f, %f \n", + attr, + tg->attrib[attr].input_ptr, + tg->attrib[attr].input_stride, + tg->attrib[attr].instance_divisor, + tg->attrib[attr].max_index, + index, + data[0], data[1],data[2], data[3]); } else { data[0] = (float)instance_id; } - if (0) debug_printf("vert %d/%d attr %d: %f %f %f %f\n", - i, elt, attr, data[0], data[1], data[2], data[3]); + + if (0) + debug_printf("vert %d/%d attr %d: %f %f %f %f\n", + i, elt, attr, data[0], data[1], data[2], data[3]); tg->attrib[attr].emit( data, dst ); } @@ -425,29 +437,42 @@ static void PIPE_CDECL generic_run( struct translate *translate, for (attr = 0; attr < nr_attrs; attr++) { float data[4]; - - char *dst = (vert + - tg->attrib[attr].output_offset); + char *dst = vert + tg->attrib[attr].output_offset; if (tg->attrib[attr].type == TRANSLATE_ELEMENT_NORMAL) { const uint8_t *src; + unsigned index; if (tg->attrib[attr].instance_divisor) { - src = tg->attrib[attr].input_ptr + - tg->attrib[attr].input_stride * - (instance_id / tg->attrib[attr].instance_divisor); - } else { - src = tg->attrib[attr].input_ptr + - tg->attrib[attr].input_stride * elt; + index = instance_id / tg->attrib[attr].instance_divisor; } + else { + index = elt; + } + + /* clamp to void going out of bounds */ + index = MIN2(index, tg->attrib[attr].max_index); + + src = tg->attrib[attr].input_ptr + + tg->attrib[attr].input_stride * index; tg->attrib[attr].fetch( data, src, 0, 0 ); + + if (0) + debug_printf("Fetch linear attr %d from %p stride %d index %d: " + " %f, %f, %f, %f \n", + attr, + tg->attrib[attr].input_ptr, + tg->attrib[attr].input_stride, + index, + data[0], data[1],data[2], data[3]); } else { data[0] = (float)instance_id; } - if (0) debug_printf("vert %d attr %d: %f %f %f %f\n", - i, attr, data[0], data[1], data[2], data[3]); + if (0) + debug_printf("vert %d attr %d: %f %f %f %f\n", + i, attr, data[0], data[1], data[2], data[3]); tg->attrib[attr].emit( data, dst ); } -- cgit v1.2.3 From 48268e0f2a5e65b63586398db3a58523a8c7a7a0 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Tue, 3 Aug 2010 16:32:32 -0600 Subject: draw: check for instance divisors in vcache_check_run() When we have instance divisors we don't really know which vertex elements we'll be fetching ahead of time. This fixes a bug in instanced drawing which was exposed by the new draw_vbo() code because of max_index not being ~0 as often as it used to be. The test for max_index >= DRAW_PIPE_MAX_VERTICES often hid this problem before. --- src/gallium/auxiliary/draw/draw_pt_vcache.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'src/gallium') diff --git a/src/gallium/auxiliary/draw/draw_pt_vcache.c b/src/gallium/auxiliary/draw/draw_pt_vcache.c index 8ef94c3163..d2fa1c6d4e 100644 --- a/src/gallium/auxiliary/draw/draw_pt_vcache.c +++ b/src/gallium/auxiliary/draw/draw_pt_vcache.c @@ -339,6 +339,25 @@ format_from_get_elt( pt_elt_func get_elt ) #endif +/** + * Check if any vertex attributes use instance divisors. + * Note that instance divisors complicate vertex fetching so we need + * to take the vcache path when they're in use. + */ +static boolean +any_instance_divisors(const struct draw_context *draw) +{ + uint i; + + for (i = 0; i < draw->pt.nr_vertex_elements; i++) { + uint div = draw->pt.vertex_element[i].instance_divisor; + if (div) + return TRUE; + } + return FALSE; +} + + static INLINE void vcache_check_run( struct draw_pt_front_end *frontend, pt_elt_func get_elt, @@ -382,6 +401,9 @@ vcache_check_run( struct draw_pt_front_end *frontend, if (max_index >= (unsigned) DRAW_PIPE_MAX_VERTICES) goto fail; + if (any_instance_divisors(draw)) + goto fail; + fetch_count = max_index + 1 - min_index; if (0) -- cgit v1.2.3 From 8bc5fe1ad67127f642c47da0a307aa7bd8696fab Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Wed, 4 Aug 2010 09:36:17 +1000 Subject: r300g: disable multisample visuals until the state tracker bits catch up. This stops us advertising lots of ms visuals we can't actually use. Signed-off-by: Dave Airlie --- src/gallium/drivers/r300/r300_screen.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'src/gallium') diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c index 676430f5fe..6268001054 100644 --- a/src/gallium/drivers/r300/r300_screen.c +++ b/src/gallium/drivers/r300/r300_screen.c @@ -279,11 +279,14 @@ static boolean r300_is_format_supported(struct pipe_screen* screen, case 3: case 4: case 6: + return FALSE; +#if 0 if (usage != PIPE_BIND_RENDER_TARGET || !util_format_is_rgba8_variant( util_format_description(format))) { return FALSE; } +#endif break; default: return FALSE; -- cgit v1.2.3 From 2824d5687a19e42ba0da8fd08e80610c4469a3b3 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Tue, 3 Aug 2010 15:23:23 -0700 Subject: r300/compiler: r500 hw support for break and continue in loops. The BGNLOOP and ENDLOOP instructions are now being used correctly, which makes break and continue possible. The deadcode pass has been modified to handle breaks, and the compiler is more careful about which loops are unrolled. --- src/gallium/drivers/r300/r300_fs.c | 9 +- src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c | 8 +- src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c | 9 +- src/mesa/drivers/dri/r300/compiler/r500_fragprog.c | 25 --- src/mesa/drivers/dri/r300/compiler/r500_fragprog.h | 2 - .../drivers/dri/r300/compiler/r500_fragprog_emit.c | 114 ++++++---- src/mesa/drivers/dri/r300/compiler/radeon_code.h | 3 + .../dri/r300/compiler/radeon_dataflow_deadcode.c | 39 +++- .../dri/r300/compiler/radeon_emulate_loops.c | 237 +++++++++++++-------- .../dri/r300/compiler/radeon_emulate_loops.h | 9 +- .../drivers/dri/r300/compiler/radeon_optimize.c | 3 +- 11 files changed, 289 insertions(+), 169 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/drivers/r300/r300_fs.c b/src/gallium/drivers/r300/r300_fs.c index db5269912e..87ff49a90c 100644 --- a/src/gallium/drivers/r300/r300_fs.c +++ b/src/gallium/drivers/r300/r300_fs.c @@ -248,13 +248,18 @@ static void r300_emit_fs_code_to_buffer( shader->cb_code_size = 19 + ((code->inst_end + 1) * 6) + - imm_count * 7; + imm_count * 7 + + code->int_constant_count * 2; NEW_CB(shader->cb_code, shader->cb_code_size); OUT_CB_REG(R500_US_CONFIG, R500_ZERO_TIMES_ANYTHING_EQUALS_ZERO); OUT_CB_REG(R500_US_PIXSIZE, code->max_temp_idx); OUT_CB_REG(R500_US_FC_CTRL, code->us_fc_ctrl); - OUT_CB_REG(R500_US_CODE_RANGE, + for(i = 0; i < code->int_constant_count; i++){ + OUT_CB_REG(R500_US_FC_INT_CONST_0 + (i * 4), + code->int_constants[i]); + } + OUT_CB_REG(R500_US_CODE_RANGE, R500_US_CODE_RANGE_ADDR(0) | R500_US_CODE_RANGE_SIZE(code->inst_end)); OUT_CB_REG(R500_US_CODE_OFFSET, 0); OUT_CB_REG(R500_US_CODE_ADDR, diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c index a326ee4c4f..070939497c 100644 --- a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c +++ b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c @@ -109,13 +109,13 @@ void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c) debug_program_log(c, "before compilation"); if (c->Base.is_r500){ - r500_transform_unroll_loops(&c->Base, &loop_state); - debug_program_log(c, "after r500 transform loops"); + rc_unroll_loops(&c->Base, R500_PFS_MAX_INST); + debug_program_log(c, "after unroll loops"); } else{ - rc_transform_unroll_loops(&c->Base, &loop_state); + rc_transform_loops(&c->Base, &loop_state, R300_PFS_MAX_ALU_INST); debug_program_log(c, "after transform loops"); - + rc_emulate_branches(&c->Base); debug_program_log(c, "after emulate branches"); } diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c index d347b4df9c..fe34ff67cd 100644 --- a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c +++ b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c @@ -633,7 +633,7 @@ static struct rc_swizzle_caps r300_vertprog_swizzle_caps = { void r3xx_compile_vertex_program(struct r300_vertex_program_compiler* compiler) { struct emulate_loop_state loop_state; - + compiler->Base.SwizzleCaps = &r300_vertprog_swizzle_caps; addArtificialOutputs(compiler); @@ -643,14 +643,13 @@ void r3xx_compile_vertex_program(struct r300_vertex_program_compiler* compiler) /* XXX Ideally this should be done only for r3xx, but since * we don't have branching support for r5xx, we use the emulation * on all chipsets. */ - rc_transform_unroll_loops(&compiler->Base, &loop_state); - - debug_program_log(compiler, "after transform loops"); - + if (compiler->Base.is_r500){ + rc_transform_loops(&compiler->Base, &loop_state, R500_VS_MAX_ALU); rc_emulate_loops(&loop_state, R500_VS_MAX_ALU); } else { rc_emulate_loops(&loop_state, R300_VS_MAX_ALU); + rc_transform_loops(&compiler->Base, &loop_state, R300_VS_MAX_ALU); } debug_program_log(compiler, "after emulate loops"); diff --git a/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c index e6b5522c5b..95be619d5d 100644 --- a/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c +++ b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c @@ -60,31 +60,6 @@ int r500_transform_IF( return 1; } -/** - * Rewrite loops to make them easier to emit. This is not a local - * transformation, because it modifies and reorders an entire block of code. - */ -void r500_transform_unroll_loops(struct radeon_compiler * c, - struct emulate_loop_state *s) -{ - int i; - - rc_transform_unroll_loops(c, s); - - for( i = s->LoopCount - 1; i >= 0; i-- ){ - struct rc_instruction * inst_continue; - if(!s->Loops[i].EndLoop){ - continue; - } - /* Insert a continue instruction at the end of the loop. This - * is required in order to emit loops correctly. */ - inst_continue = rc_insert_new_instruction(c, - s->Loops[i].EndIf->Prev); - inst_continue->U.I.Opcode = RC_OPCODE_CONTINUE; - } - -} - static int r500_swizzle_is_native(rc_opcode opcode, struct rc_src_register reg) { unsigned int relevant; diff --git a/src/mesa/drivers/dri/r300/compiler/r500_fragprog.h b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.h index 0d005a794f..34173351f8 100644 --- a/src/mesa/drivers/dri/r300/compiler/r500_fragprog.h +++ b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.h @@ -49,6 +49,4 @@ extern int r500_transform_IF( struct rc_instruction * inst, void* data); -void r500_transform_unroll_loops(struct radeon_compiler * c, - struct emulate_loop_state * s); #endif diff --git a/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c b/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c index 0bd8f0a239..c3f817ad4e 100644 --- a/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c +++ b/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c @@ -64,7 +64,12 @@ struct branch_info { }; struct loop_info { - int LoopStart; + int BgnLoop; + + int BranchDepth; + int * Brks; + int BrkCount; + int BrkReserved; }; struct emit_state { @@ -368,6 +373,12 @@ static void emit_flowcontrol(struct emit_state * s, struct rc_instruction * inst unsigned int newip = ++s->Code->inst_end; + /* Currently all loops use the same integer constant to intialize + * the loop variables. */ + if(!s->Code->int_constants[0]) { + s->Code->int_constants[0] = R500_FC_INT_CONST_KR(0xff); + s->Code->int_constant_count = 1; + } s->Code->inst[newip].inst0 = R500_INST_TYPE_FC | R500_INST_ALU_WAIT; switch(inst->U.I.Opcode){ @@ -378,32 +389,69 @@ static void emit_flowcontrol(struct emit_state * s, struct rc_instruction * inst s->Loops, s->CurrentLoopDepth, s->LoopsReserved, 1); loop = &s->Loops[s->CurrentLoopDepth++]; - - /* We don't emit an instruction for BGNLOOP, so we need to - * decrement the instruction counter, but first we need to - * set LoopStart to the current value of inst_end, which - * will end up being the first real instruction in the loop.*/ - loop->LoopStart = s->Code->inst_end--; + memset(loop, 0, sizeof(struct loop_info)); + loop->BranchDepth = s->CurrentBranchDepth; + loop->BgnLoop = newip; + + s->Code->inst[newip].inst2 = R500_FC_OP_LOOP + | R500_FC_JUMP_FUNC(0x00) + | R500_FC_IGNORE_UNCOVERED + ; break; - case RC_OPCODE_BRK: - /* Don't emit an instruction for BRK */ - s->Code->inst_end--; + loop = &s->Loops[s->CurrentLoopDepth - 1]; + memory_pool_array_reserve(&s->C->Pool, int, loop->Brks, + loop->BrkCount, loop->BrkReserved, 1); + + loop->Brks[loop->BrkCount++] = newip; + s->Code->inst[newip].inst2 = R500_FC_OP_BREAKLOOP + | R500_FC_JUMP_FUNC(0xff) + | R500_FC_B_OP1_DECR + | R500_FC_B_POP_CNT( + s->CurrentBranchDepth - loop->BranchDepth) + | R500_FC_IGNORE_UNCOVERED + ; break; case RC_OPCODE_CONTINUE: loop = &s->Loops[s->CurrentLoopDepth - 1]; - s->Code->inst[newip].inst2 = R500_FC_OP_JUMP | - R500_FC_JUMP_FUNC(0xff); - s->Code->inst[newip].inst3 = R500_FC_JUMP_ADDR(loop->LoopStart); + s->Code->inst[newip].inst2 = R500_FC_OP_JUMP + | R500_FC_JUMP_FUNC(0xff) + | R500_FC_B_OP1_DECR + | R500_FC_B_POP_CNT( + s->CurrentBranchDepth - loop->BranchDepth) + ; + s->Code->inst[newip].inst3 = R500_FC_JUMP_ADDR(loop->BgnLoop); break; case RC_OPCODE_ENDLOOP: - /* Don't emit an instruction for ENDLOOP */ - s->Code->inst_end--; + { + unsigned int i; + loop = &s->Loops[s->CurrentLoopDepth - 1]; + /* Emit ENDLOOP */ + s->Code->inst[newip].inst2 = R500_FC_OP_ENDLOOP + | R500_FC_JUMP_FUNC(0xff) + | R500_FC_JUMP_ANY + | R500_FC_IGNORE_UNCOVERED + ; + /* The constant integer at index 0 is used by all loops. */ + s->Code->inst[newip].inst3 = R500_FC_INT_ADDR(0) + | R500_FC_JUMP_ADDR(loop->BgnLoop + 1) + ; + + /* Set jump address and int constant for BGNLOOP */ + s->Code->inst[loop->BgnLoop].inst3 = R500_FC_INT_ADDR(0) + | R500_FC_JUMP_ADDR(newip) + ; + + /* Set jump address for the BRK instructions. */ + while(loop->BrkCount--) { + s->Code->inst[loop->Brks[loop->BrkCount]].inst3 = + R500_FC_JUMP_ADDR(newip + 1); + } s->CurrentLoopDepth--; break; - + } case RC_OPCODE_IF: if ( s->CurrentBranchDepth >= MAX_BRANCH_DEPTH_FULL) { rc_error(s->C, "Branch depth exceeds hardware limit"); @@ -442,24 +490,16 @@ static void emit_flowcontrol(struct emit_state * s, struct rc_instruction * inst } branch = &s->Branches[s->CurrentBranchDepth - 1]; - - if(inst->Prev->U.I.Opcode == RC_OPCODE_BRK){ - branch->Endif = --s->Code->inst_end; - s->Code->inst[branch->Endif].inst2 |= - R500_FC_B_OP0_DECR; - } - else{ - branch->Endif = newip; - - s->Code->inst[branch->Endif].inst2 = R500_FC_OP_JUMP - | R500_FC_A_OP_NONE /* no address stack */ - | R500_FC_JUMP_ANY /* docs says set this, but I don't understand why */ - | R500_FC_B_OP0_DECR /* decrement branch counter if stay */ - | R500_FC_B_OP1_NONE /* no branch counter if stay */ - | R500_FC_B_POP_CNT(1) + branch->Endif = newip; + + s->Code->inst[branch->Endif].inst2 = R500_FC_OP_JUMP + | R500_FC_A_OP_NONE /* no address stack */ + | R500_FC_JUMP_ANY /* docs says set this, but I don't understand why */ + | R500_FC_B_OP0_DECR /* decrement branch counter if stay */ + | R500_FC_B_OP1_NONE /* no branch counter if stay */ + | R500_FC_B_POP_CNT(1) ; - s->Code->inst[branch->Endif].inst3 = R500_FC_JUMP_ADDR(branch->Endif + 1); - } + s->Code->inst[branch->Endif].inst3 = R500_FC_JUMP_ADDR(branch->Endif + 1); s->Code->inst[branch->If].inst2 = R500_FC_OP_JUMP | R500_FC_A_OP_NONE /* no address stack */ | R500_FC_JUMP_FUNC(0x0f) /* jump if ALU result is false */ @@ -544,11 +584,9 @@ void r500BuildFragmentProgramHwCode(struct r300_fragment_program_compiler *compi code->inst[ip].inst0 = R500_INST_TYPE_OUT | R500_INST_TEX_SEM_WAIT; } - /* Use FULL flow control mode if branches are nested deep enough. - * We don not need to enable FULL flow control mode for loops, becasue - * we aren't using the hardware loop instructions. - */ - if (s.MaxBranchDepth >= 4) { + /* Enable full flow control mode if we are using loops or have if + * statements nested at least four deep. */ + if (s.MaxBranchDepth >= 4 || s.LoopsReserved > 0) { if (code->max_temp_idx < 1) code->max_temp_idx = 1; diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_code.h b/src/mesa/drivers/dri/r300/compiler/radeon_code.h index d03689763b..e14a3520dd 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_code.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_code.h @@ -221,6 +221,9 @@ struct r500_fragment_program_code { int max_temp_idx; uint32_t us_fc_ctrl; + + uint32_t int_constants[32]; + uint32_t int_constant_count; }; struct rX00_fragment_program_code { diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_deadcode.c b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_deadcode.c index fbb4235c22..31566a937f 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_deadcode.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_deadcode.c @@ -43,6 +43,12 @@ struct instruction_state { unsigned char SrcReg[3]; }; +struct loopinfo { + struct updatemask_state * Breaks; + unsigned int BreakCount; + unsigned int BreaksReserved; +}; + struct branchinfo { unsigned int HaveElse:1; @@ -59,6 +65,10 @@ struct deadcode_state { struct branchinfo * BranchStack; unsigned int BranchStackSize; unsigned int BranchStackReserved; + + struct loopinfo * LoopStack; + unsigned int LoopStackSize; + unsigned int LoopStackReserved; }; @@ -78,6 +88,22 @@ static void or_updatemasks( dst->Address = a->Address | b->Address; } +static void push_break(struct deadcode_state *s) +{ + struct loopinfo * loop = &s->LoopStack[s->LoopStackSize - 1]; + memory_pool_array_reserve(&s->C->Pool, struct updatemask_state, + loop->Breaks, loop->BreakCount, loop->BreaksReserved, 1); + + memcpy(&loop->Breaks[loop->BreakCount++], &s->R, sizeof(s->R)); +} + +static void push_loop(struct deadcode_state * s) +{ + memory_pool_array_reserve(&s->C->Pool, struct loopinfo, s->LoopStack, + s->LoopStackSize, s->LoopStackReserved, 1); + memset(&s->LoopStack[s->LoopStackSize++], 0, sizeof(struct loopinfo)); +} + static void push_branch(struct deadcode_state * s) { memory_pool_array_reserve(&s->C->Pool, struct branchinfo, s->BranchStack, @@ -233,11 +259,22 @@ void rc_dataflow_deadcode(struct radeon_compiler * c, rc_dataflow_mark_outputs_f } } } + push_loop(&s); break; } - case RC_OPCODE_CONTINUE: case RC_OPCODE_BRK: + push_break(&s); + break; case RC_OPCODE_BGNLOOP: + { + unsigned int i; + struct loopinfo * loop = &s.LoopStack[s.LoopStackSize-1]; + for(i = 0; i < loop->BreakCount; i++) { + or_updatemasks(&s.R, &s.R, &loop->Breaks[i]); + } + break; + } + case RC_OPCODE_CONTINUE: break; case RC_OPCODE_ENDIF: push_branch(&s); diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.c b/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.c index fed4d8829a..94e3e5f4f5 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.c @@ -39,7 +39,6 @@ #define DBG(...) do { if (VERBOSE) fprintf(stderr, __VA_ARGS__); } while(0) struct const_value { - struct radeon_compiler * C; struct rc_src_register * Src; float Value; @@ -78,17 +77,17 @@ static int src_reg_is_immediate(struct rc_src_register * src, c->Program.Constants.Constants[src->Index].Type==RC_CONSTANT_IMMEDIATE; } -static unsigned int loop_calc_iterations(struct emulate_loop_state *s, - struct loop_info * loop, unsigned int max_instructions) +static unsigned int loop_max_possible_iterations(struct radeon_compiler *c, + struct loop_info * loop, unsigned int prog_inst_limit) { - unsigned int total_i = rc_recompute_ips(s->C); + unsigned int total_i = rc_recompute_ips(c); unsigned int loop_i = (loop->EndLoop->IP - loop->BeginLoop->IP) - 1; /* +1 because the program already has one iteration of the loop. */ - return 1 + ((max_instructions - total_i) / (s->LoopCount * loop_i)); + return 1 + ((prog_inst_limit - total_i) / loop_i); } -static void loop_unroll(struct emulate_loop_state * s, - struct loop_info *loop, unsigned int iterations) +static void unroll_loop(struct radeon_compiler * c, struct loop_info * loop, + unsigned int iterations) { unsigned int i; struct rc_instruction * ptr; @@ -99,7 +98,7 @@ static void loop_unroll(struct emulate_loop_state * s, rc_remove_instruction(loop->EndLoop); for( i = 1; i < iterations; i++){ for(ptr = first; ptr != last->Next; ptr = ptr->Next){ - struct rc_instruction *new = rc_alloc_instruction(s->C); + struct rc_instruction *new = rc_alloc_instruction(c); memcpy(new, ptr, sizeof(struct rc_instruction)); rc_insert_instruction(append_to, new); append_to = new; @@ -115,7 +114,7 @@ static void update_const_value(void * data, struct rc_instruction * inst, if(value->Src->File != file || value->Src->Index != index || !(1 << GET_SWZ(value->Src->Swizzle, 0) & mask)){ - return; + return; } switch(inst->U.I.Opcode){ case RC_OPCODE_MOV: @@ -140,7 +139,7 @@ static void get_incr_amount(void * data, struct rc_instruction * inst, if(file != RC_FILE_TEMPORARY || count_inst->Index != index || (1 << GET_SWZ(count_inst->Swz,0) != mask)){ - return; + return; } /* Find the index of the counter register. */ opcode = rc_get_opcode_info(inst->U.I.Opcode); @@ -185,11 +184,10 @@ static void get_incr_amount(void * data, struct rc_instruction * inst, count_inst->Unknown = 1; return; } - } -static int transform_const_loop(struct emulate_loop_state * s, - struct loop_info * loop) +static int try_unroll_loop(struct radeon_compiler * c, struct loop_info * loop, + unsigned int prog_inst_limit) { int end_loops; int iterations; @@ -201,12 +199,12 @@ static int transform_const_loop(struct emulate_loop_state * s, struct rc_instruction * inst; /* Find the counter and the upper limit */ - - if(src_reg_is_immediate(&loop->Cond->U.I.SrcReg[0], s->C)){ + + if(src_reg_is_immediate(&loop->Cond->U.I.SrcReg[0], c)){ limit = &loop->Cond->U.I.SrcReg[0]; counter = &loop->Cond->U.I.SrcReg[1]; } - else if(src_reg_is_immediate(&loop->Cond->U.I.SrcReg[1], s->C)){ + else if(src_reg_is_immediate(&loop->Cond->U.I.SrcReg[1], c)){ limit = &loop->Cond->U.I.SrcReg[1]; counter = &loop->Cond->U.I.SrcReg[0]; } @@ -214,13 +212,13 @@ static int transform_const_loop(struct emulate_loop_state * s, DBG("No constant limit.\n"); return 0; } - + /* Find the initial value of the counter */ counter_value.Src = counter; counter_value.Value = 0.0f; counter_value.HasValue = 0; - counter_value.C = s->C; - for(inst = s->C->Program.Instructions.Next; inst != loop->BeginLoop; + counter_value.C = c; + for(inst = c->Program.Instructions.Next; inst != loop->BeginLoop; inst = inst->Next){ rc_for_all_writes_mask(inst, update_const_value, &counter_value); } @@ -230,7 +228,7 @@ static int transform_const_loop(struct emulate_loop_state * s, } DBG("Initial counter value is %f\n", counter_value.Value); /* Determine how the counter is modified each loop */ - count_inst.C = s->C; + count_inst.C = c; count_inst.Index = counter->Index; count_inst.Swz = counter->Swizzle; count_inst.Amount = 0.0f; @@ -277,17 +275,20 @@ static int transform_const_loop(struct emulate_loop_state * s, /* Calculate the number of iterations of this loop. Keeping this * simple, since we only support increment and decrement loops. */ - limit_value = get_constant_value(s->C, limit, 0); + limit_value = get_constant_value(c, limit, 0); DBG("Limit is %f.\n", limit_value); + /* The iteration calculations are opposite of what you would expect. + * In a normal loop, if the condition is met, then loop continues, but + * with our loops, if the condition is met, the is exited. */ switch(loop->Cond->U.I.Opcode){ - case RC_OPCODE_SGT: - case RC_OPCODE_SLT: + case RC_OPCODE_SGE: + case RC_OPCODE_SLE: iterations = (int) ceilf((limit_value - counter_value.Value) / count_inst.Amount); break; - case RC_OPCODE_SLE: - case RC_OPCODE_SGE: + case RC_OPCODE_SGT: + case RC_OPCODE_SLT: iterations = (int) floorf((limit_value - counter_value.Value) / count_inst.Amount) + 1; break; @@ -295,77 +296,84 @@ static int transform_const_loop(struct emulate_loop_state * s, return 0; } + if (iterations > loop_max_possible_iterations(c, loop, + prog_inst_limit)) { + return 0; + } + DBG("Loop will have %d iterations.\n", iterations); - + /* Prepare loop for unrolling */ rc_remove_instruction(loop->Cond); rc_remove_instruction(loop->If); rc_remove_instruction(loop->Brk); rc_remove_instruction(loop->EndIf); - - loop_unroll(s, loop, iterations); + + unroll_loop(c, loop, iterations); loop->EndLoop = NULL; return 1; } -/** - * This function prepares a loop to be unrolled by converting it into an if - * statement. Here is an outline of the conversion process: - * BGNLOOP; -> BGNLOOP; - * -> - * SGE/SLT temp[0], temp[1], temp[2]; -> SLT/SGE temp[0], temp[1], temp[2]; - * IF temp[0]; -> IF temp[0]; - * BRK; -> - * ENDIF; -> - * -> ENDIF; - * ENDLOOP; -> ENDLOOP - * +/** + * @param c + * @param loop * @param inst A pointer to a BGNLOOP instruction. - * @return If the loop can be unrolled, a pointer to the first instruction of - * the unrolled loop. - * Otherwise, A pointer to the ENDLOOP instruction. - * Null if there is an error. + * @return 1 if all of the members of loop where set. + * @return 0 if there was an error and some members of loop are still NULL. */ -static struct rc_instruction * transform_loop(struct emulate_loop_state * s, +static int build_loop_info(struct radeon_compiler * c, struct loop_info * loop, struct rc_instruction * inst) { - struct loop_info *loop; struct rc_instruction * ptr; - memory_pool_array_reserve(&s->C->Pool, struct loop_info, - s->Loops, s->LoopCount, s->LoopReserved, 1); - - loop = &s->Loops[s->LoopCount++]; - memset(loop, 0, sizeof(struct loop_info)); if(inst->U.I.Opcode != RC_OPCODE_BGNLOOP){ - rc_error(s->C, "expected BGNLOOP\n", __FUNCTION__); - return NULL; + rc_error(c, "%s: expected BGNLOOP", __FUNCTION__); + return 0; } + + memset(loop, 0, sizeof(struct loop_info)); + loop->BeginLoop = inst; - for(ptr = loop->BeginLoop->Next; !loop->EndLoop; ptr = ptr->Next){ + for(ptr = loop->BeginLoop->Next; !loop->EndLoop; ptr = ptr->Next) { + + if (ptr == &c->Program.Instructions) { + rc_error(c, "%s: BGNLOOP without an ENDLOOOP.\n", + __FUNCTION__); + return 0; + } + switch(ptr->U.I.Opcode){ case RC_OPCODE_BGNLOOP: - /* Nested loop */ - ptr = transform_loop(s, ptr); - if(!ptr){ - return NULL; + { + /* Nested loop, skip ahead to the end. */ + unsigned int loop_depth = 1; + for(ptr = ptr->Next; ptr != &c->Program.Instructions; + ptr = ptr->Next){ + if (ptr->U.I.Opcode == RC_OPCODE_BGNLOOP) { + loop_depth++; + } else if (ptr->U.I.Opcode == RC_OPCODE_ENDLOOP) { + if (!--loop_depth) { + break; + } + } + } + if (ptr == &c->Program.Instructions) { + rc_error(c, "%s: BGNLOOP without an ENDLOOOP\n", + __FUNCTION__); + return 0; } break; + } case RC_OPCODE_BRK: - loop->Brk = ptr; - if(ptr->Next->U.I.Opcode != RC_OPCODE_ENDIF){ - rc_error(s->C, - "%s: expected ENDIF\n",__FUNCTION__); - return NULL; - } - loop->EndIf = ptr->Next; - if(ptr->Prev->U.I.Opcode != RC_OPCODE_IF){ - rc_error(s->C, - "%s: expected IF\n", __FUNCTION__); - return NULL; + if(ptr->Next->U.I.Opcode != RC_OPCODE_ENDIF + || ptr->Prev->U.I.Opcode != RC_OPCODE_IF + || loop->Brk){ + continue; } + loop->Brk = ptr; loop->If = ptr->Prev; + loop->EndIf = ptr->Next; switch(loop->If->Prev->U.I.Opcode){ case RC_OPCODE_SLT: case RC_OPCODE_SGE: @@ -375,18 +383,62 @@ static struct rc_instruction * transform_loop(struct emulate_loop_state * s, case RC_OPCODE_SNE: break; default: - rc_error(s->C, "%s expected conditional\n", + rc_error(c, "%s: expected conditional", __FUNCTION__); - return NULL; + return 0; } loop->Cond = loop->If->Prev; - ptr = loop->EndIf; break; + case RC_OPCODE_ENDLOOP: loop->EndLoop = ptr; break; } } + + if (loop->BeginLoop && loop->Brk && loop->If && loop->EndIf + && loop->Cond && loop->EndLoop) { + return 1; + } + return 0; +} + +/** + * This function prepares a loop to be unrolled by converting it into an if + * statement. Here is an outline of the conversion process: + * BGNLOOP; -> BGNLOOP; + * -> + * SGE/SLT temp[0], temp[1], temp[2]; -> SLT/SGE temp[0], temp[1], temp[2]; + * IF temp[0]; -> IF temp[0]; + * BRK; -> + * ENDIF; -> + * -> ENDIF; + * ENDLOOP; -> ENDLOOP + * + * @param inst A pointer to a BGNLOOP instruction. + * @return If the loop can be unrolled, a pointer to the first instruction of + * the unrolled loop. + * Otherwise, A pointer to the ENDLOOP instruction. + * Null if there is an error. + */ +static struct rc_instruction * transform_loop(struct emulate_loop_state * s, + struct rc_instruction * inst, + int prog_inst_limit) +{ + struct loop_info * loop; + + memory_pool_array_reserve(&s->C->Pool, struct loop_info, + s->Loops, s->LoopCount, s->LoopReserved, 1); + + loop = &s->Loops[s->LoopCount++]; + + if (!build_loop_info(s->C, loop, inst)) + return NULL; + + if(try_unroll_loop(s->C, loop, prog_inst_limit)){ + return loop->BeginLoop->Next; + } + /* Reverse the conditional instruction */ switch(loop->Cond->U.I.Opcode){ case RC_OPCODE_SGE: @@ -411,31 +463,27 @@ static struct rc_instruction * transform_loop(struct emulate_loop_state * s, rc_error(s->C, "loop->Cond is not a conditional.\n"); return NULL; } - - /* Check if the number of loops is known at compile time. */ - if(transform_const_loop(s, loop)){ - return loop->BeginLoop->Next; - } - /* Prepare the loop to be unrolled */ + /* Prepare the loop to be emulated */ rc_remove_instruction(loop->Brk); rc_remove_instruction(loop->EndIf); rc_insert_instruction(loop->EndLoop->Prev, loop->EndIf); return loop->EndLoop; } -void rc_transform_unroll_loops(struct radeon_compiler *c, - struct emulate_loop_state * s) +void rc_transform_loops(struct radeon_compiler *c, + struct emulate_loop_state * s, + int prog_inst_limit) { struct rc_instruction * ptr; - + memset(s, 0, sizeof(struct emulate_loop_state)); s->C = c; ptr = s->C->Program.Instructions.Next; while(ptr != &s->C->Program.Instructions) { if(ptr->Type == RC_INSTRUCTION_NORMAL && ptr->U.I.Opcode == RC_OPCODE_BGNLOOP){ - ptr = transform_loop(s, ptr); + ptr = transform_loop(s, ptr, prog_inst_limit); if(!ptr){ return; } @@ -444,8 +492,23 @@ void rc_transform_unroll_loops(struct radeon_compiler *c, } } -void rc_emulate_loops(struct emulate_loop_state *s, - unsigned int max_instructions) +void rc_unroll_loops(struct radeon_compiler *c, int prog_inst_limit) +{ + struct rc_instruction * inst; + struct loop_info loop; + + for(inst = c->Program.Instructions.Next; + inst != &c->Program.Instructions; inst = inst->Next) { + + if (inst->U.I.Opcode == RC_OPCODE_BGNLOOP) { + if (build_loop_info(c, &loop, inst)) { + try_unroll_loop(c, &loop, prog_inst_limit); + } + } + } +} + +void rc_emulate_loops(struct emulate_loop_state *s, int prog_inst_limit) { int i; /* Iterate backwards of the list of loops so that loops that nested @@ -455,8 +518,8 @@ void rc_emulate_loops(struct emulate_loop_state *s, if(!s->Loops[i].EndLoop){ continue; } - unsigned int iterations = loop_calc_iterations(s, &s->Loops[i], - max_instructions); - loop_unroll(s, &s->Loops[i], iterations); + unsigned int iterations = loop_max_possible_iterations( + s->C, &s->Loops[i], prog_inst_limit); + unroll_loop(s->C, &s->Loops[i], iterations); } } diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.h b/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.h index 7748813c4e..339527ba3b 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.h @@ -23,10 +23,11 @@ struct emulate_loop_state { unsigned int LoopReserved; }; -void rc_transform_unroll_loops(struct radeon_compiler *c, - struct emulate_loop_state * s); +void rc_transform_loops(struct radeon_compiler *c, + struct emulate_loop_state * s, int prog_inst_limit); -void rc_emulate_loops(struct emulate_loop_state *s, - unsigned int max_instructions); +void rc_unroll_loops(struct radeon_compiler * c, int prog_inst_limit); + +void rc_emulate_loops(struct emulate_loop_state * s, int prog_inst_limit); #endif /* RADEON_EMULATE_LOOPS_H */ diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c b/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c index eca0651536..7a3f35950a 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c @@ -164,7 +164,8 @@ static void peephole(struct radeon_compiler * c, struct rc_instruction * inst_mo inst = inst->Next) { /* XXX In the future we might be able to make the optimizer * smart enough to handle loops. */ - if(inst->U.I.Opcode == RC_OPCODE_BGNLOOP){ + if(inst->U.I.Opcode == RC_OPCODE_BGNLOOP + || inst->U.I.Opcode == RC_OPCODE_ENDLOOP){ return; } rc_for_all_reads_mask(inst, peephole_scan_read, &s); -- cgit v1.2.3 From 5c2f01bbb076af8b8ae6e1803d95a9ae678c2d1c Mon Sep 17 00:00:00 2001 From: Chia-I Wu Date: Wed, 4 Aug 2010 14:00:04 +0800 Subject: draw: Fix the edge flags of flatshade_first polygons. This bug can be triggered by rendering polygons with glProvokingVertexEXT(GL_FIRST_VERTEX_CONVENTION_EXT); glPolygonMode(GL_FRONT_AND_BACK, GL_LINE); --- src/gallium/auxiliary/draw/draw_pt_vcache_tmp.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/auxiliary/draw/draw_pt_vcache_tmp.h b/src/gallium/auxiliary/draw/draw_pt_vcache_tmp.h index dac68ad439..a42162691b 100644 --- a/src/gallium/auxiliary/draw/draw_pt_vcache_tmp.h +++ b/src/gallium/auxiliary/draw/draw_pt_vcache_tmp.h @@ -146,10 +146,10 @@ static void FUNC( struct draw_pt_front_end *frontend, if (flatfirst) { flags = (DRAW_PIPE_RESET_STIPPLE | - DRAW_PIPE_EDGE_FLAG_1 | - DRAW_PIPE_EDGE_FLAG_2); - edge_next = DRAW_PIPE_EDGE_FLAG_2; - edge_finish = DRAW_PIPE_EDGE_FLAG_0; + DRAW_PIPE_EDGE_FLAG_0 | + DRAW_PIPE_EDGE_FLAG_1); + edge_next = DRAW_PIPE_EDGE_FLAG_1; + edge_finish = DRAW_PIPE_EDGE_FLAG_2; } else { flags = (DRAW_PIPE_RESET_STIPPLE | -- cgit v1.2.3 From 988e86762f07eb8b685eef6f3302566f604db2a6 Mon Sep 17 00:00:00 2001 From: Vinson Lee Date: Wed, 4 Aug 2010 12:55:21 -0700 Subject: draw: Fix return type of draw_translate_vinfo_size. Fixes typo from commit b609cfc7c9c38f26e7e6d6f7dd5dd6d38f4ed209. --- src/gallium/auxiliary/draw/draw_vertex.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium') diff --git a/src/gallium/auxiliary/draw/draw_vertex.h b/src/gallium/auxiliary/draw/draw_vertex.h index 3af31ffe12..e63cf5f4f9 100644 --- a/src/gallium/auxiliary/draw/draw_vertex.h +++ b/src/gallium/auxiliary/draw/draw_vertex.h @@ -166,7 +166,7 @@ static INLINE enum pipe_format draw_translate_vinfo_format(enum attrib_emit emit } } -static INLINE enum attrib_emit draw_translate_vinfo_size(enum attrib_emit emit) +static INLINE unsigned draw_translate_vinfo_size(enum attrib_emit emit) { switch (emit) { case EMIT_OMIT: -- cgit v1.2.3 From 9a78e790dc4c40362b971ad5eff2505c02b73ed7 Mon Sep 17 00:00:00 2001 From: Jerome Glisse Date: Wed, 4 Aug 2010 16:10:11 -0400 Subject: r600g: always perform texture perspective divide + fix blending quake3 engine seems to run fine at this point (ioquake) Signed-off-by: Jerome Glisse --- src/gallium/drivers/r600/r600_shader.c | 121 +++++++++++++------------- src/gallium/drivers/r600/r600_state.c | 5 +- src/gallium/drivers/r600/r600_state_inlines.h | 2 +- src/gallium/drivers/r600/r600d.h | 25 ++++++ 4 files changed, 88 insertions(+), 65 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index 43b3e40fad..8da102cde0 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -963,68 +963,66 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) src_gpr = ctx->file_offset[inst->Src[0].Register.File] + inst->Src[0].Register.Index; /* Add perspective divide */ - if (ctx->inst_info->tgsi_opcode == TGSI_OPCODE_TXP) { - memset(&alu, 0, sizeof(struct r600_bc_alu)); - alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE; - alu.src[0].sel = src_gpr; - alu.src[0].chan = tgsi_chan(&inst->Src[0], 3); - alu.dst.sel = ctx->temp_reg; - alu.dst.chan = 3; - alu.last = 1; - alu.dst.write = 1; - r = r600_bc_add_alu(ctx->bc, &alu); - if (r) - return r; + memset(&alu, 0, sizeof(struct r600_bc_alu)); + alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE; + alu.src[0].sel = src_gpr; + alu.src[0].chan = tgsi_chan(&inst->Src[0], 3); + alu.dst.sel = ctx->temp_reg; + alu.dst.chan = 3; + alu.last = 1; + alu.dst.write = 1; + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; - memset(&alu, 0, sizeof(struct r600_bc_alu)); - alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL; - alu.src[0].sel = ctx->temp_reg; - alu.src[0].chan = 3; - alu.src[1].sel = src_gpr; - alu.src[1].chan = tgsi_chan(&inst->Src[0], 0); - alu.dst.sel = ctx->temp_reg; - alu.dst.chan = 0; - alu.dst.write = 1; - r = r600_bc_add_alu(ctx->bc, &alu); - if (r) - return r; - memset(&alu, 0, sizeof(struct r600_bc_alu)); - alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL; - alu.src[0].sel = ctx->temp_reg; - alu.src[0].chan = 3; - alu.src[1].sel = src_gpr; - alu.src[1].chan = tgsi_chan(&inst->Src[0], 1); - alu.dst.sel = ctx->temp_reg; - alu.dst.chan = 1; - alu.dst.write = 1; - r = r600_bc_add_alu(ctx->bc, &alu); - if (r) - return r; - memset(&alu, 0, sizeof(struct r600_bc_alu)); - alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL; - alu.src[0].sel = ctx->temp_reg; - alu.src[0].chan = 3; - alu.src[1].sel = src_gpr; - alu.src[1].chan = tgsi_chan(&inst->Src[0], 2); - alu.dst.sel = ctx->temp_reg; - alu.dst.chan = 2; - alu.dst.write = 1; - r = r600_bc_add_alu(ctx->bc, &alu); - if (r) - return r; - memset(&alu, 0, sizeof(struct r600_bc_alu)); - alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV; - alu.src[0].sel = 249; - alu.src[0].chan = 0; - alu.dst.sel = ctx->temp_reg; - alu.dst.chan = 3; - alu.last = 1; - alu.dst.write = 1; - r = r600_bc_add_alu(ctx->bc, &alu); - if (r) - return r; - src_gpr = ctx->temp_reg; - } + memset(&alu, 0, sizeof(struct r600_bc_alu)); + alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL; + alu.src[0].sel = ctx->temp_reg; + alu.src[0].chan = 3; + alu.src[1].sel = src_gpr; + alu.src[1].chan = tgsi_chan(&inst->Src[0], 0); + alu.dst.sel = ctx->temp_reg; + alu.dst.chan = 0; + alu.dst.write = 1; + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + memset(&alu, 0, sizeof(struct r600_bc_alu)); + alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL; + alu.src[0].sel = ctx->temp_reg; + alu.src[0].chan = 3; + alu.src[1].sel = src_gpr; + alu.src[1].chan = tgsi_chan(&inst->Src[0], 1); + alu.dst.sel = ctx->temp_reg; + alu.dst.chan = 1; + alu.dst.write = 1; + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + memset(&alu, 0, sizeof(struct r600_bc_alu)); + alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL; + alu.src[0].sel = ctx->temp_reg; + alu.src[0].chan = 3; + alu.src[1].sel = src_gpr; + alu.src[1].chan = tgsi_chan(&inst->Src[0], 2); + alu.dst.sel = ctx->temp_reg; + alu.dst.chan = 2; + alu.dst.write = 1; + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + memset(&alu, 0, sizeof(struct r600_bc_alu)); + alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV; + alu.src[0].sel = 249; + alu.src[0].chan = 0; + alu.dst.sel = ctx->temp_reg; + alu.dst.chan = 3; + alu.last = 1; + alu.dst.write = 1; + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + src_gpr = ctx->temp_reg; /* TODO use temp if src_gpr is not a temporary reg (File != TEMPORARY) */ memset(&tex, 0, sizeof(struct r600_bc_tex)); @@ -1041,6 +1039,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) tex.src_sel_y = 1; tex.src_sel_z = 2; tex.src_sel_w = 3; + if (inst->Texture.Texture != TGSI_TEXTURE_RECT) { tex.coord_type_x = 1; tex.coord_type_y = 1; diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index 72c0ac5dd1..e8a591f73e 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -628,7 +628,6 @@ static struct radeon_state *r600_blend(struct r600_context *rctx) rstate->states[R600_BLEND__CB_BLEND_CONTROL] = 0x00000000; for (i = 0; i < 8; i++) { - unsigned eqRGB = state->rt[i].rgb_func; unsigned srcRGB = state->rt[i].rgb_src_factor; unsigned dstRGB = state->rt[i].rgb_dst_factor; @@ -1202,7 +1201,7 @@ static struct radeon_state *r600_cb_cntl(struct r600_context *rctx) int i; target_mask = 0; - color_control = 0; + color_control = S_028808_PER_MRT_BLEND(1); if (pbs->logicop_enable) { color_control |= (pbs->logicop_func) << 16; @@ -1212,7 +1211,7 @@ static struct radeon_state *r600_cb_cntl(struct r600_context *rctx) target_mask |= (pbs->rt[0].colormask); for (i = 0; i < 8; i++) { if (pbs->rt[i].blend_enable) { - color_control |= (1 << (8 + i)); + color_control |= S_028808_TARGET_BLEND_ENABLE(1 << i); target_mask |= (pbs->rt[0].colormask << (4 * i)); } } diff --git a/src/gallium/drivers/r600/r600_state_inlines.h b/src/gallium/drivers/r600/r600_state_inlines.h index 26a5dd0432..321e75d7a1 100644 --- a/src/gallium/drivers/r600/r600_state_inlines.h +++ b/src/gallium/drivers/r600/r600_state_inlines.h @@ -48,7 +48,7 @@ static INLINE uint32_t r600_translate_blend_factor(int blend_fact) { switch (blend_fact) { case PIPE_BLENDFACTOR_ONE: - return V_028804_BLEND_ZERO; + return V_028804_BLEND_ONE; case PIPE_BLENDFACTOR_SRC_COLOR: return V_028804_BLEND_SRC_COLOR; case PIPE_BLENDFACTOR_SRC_ALPHA: diff --git a/src/gallium/drivers/r600/r600d.h b/src/gallium/drivers/r600/r600d.h index 24cb8593f7..8205bdeadc 100644 --- a/src/gallium/drivers/r600/r600d.h +++ b/src/gallium/drivers/r600/r600d.h @@ -315,6 +315,31 @@ #define S_028800_STENCILZFAIL_BF(x) (((x) & 0x7) << 29) #define G_028800_STENCILZFAIL_BF(x) (((x) >> 29) & 0x7) #define C_028800_STENCILZFAIL_BF 0x1FFFFFFF +#define R_028808_CB_COLOR_CONTROL 0x028808 +#define S_028808_FOG_ENABLE(x) (((x) & 0x1) << 0) +#define G_028808_FOG_ENABLE(x) (((x) >> 0) & 0x1) +#define C_028808_FOG_ENABLE 0xFFFFFFFE +#define S_028808_MULTIWRITE_ENABLE(x) (((x) & 0x1) << 1) +#define G_028808_MULTIWRITE_ENABLE(x) (((x) >> 1) & 0x1) +#define C_028808_MULTIWRITE_ENABLE 0xFFFFFFFD +#define S_028808_DITHER_ENABLE(x) (((x) & 0x1) << 2) +#define G_028808_DITHER_ENABLE(x) (((x) >> 2) & 0x1) +#define C_028808_DITHER_ENABLE 0xFFFFFFFB +#define S_028808_DEGAMMA_ENABLE(x) (((x) & 0x1) << 3) +#define G_028808_DEGAMMA_ENABLE(x) (((x) >> 3) & 0x1) +#define C_028808_DEGAMMA_ENABLE 0xFFFFFFF7 +#define S_028808_SPECIAL_OP(x) (((x) & 0x7) << 4) +#define G_028808_SPECIAL_OP(x) (((x) >> 4) & 0x7) +#define C_028808_SPECIAL_OP 0xFFFFFF8F +#define S_028808_PER_MRT_BLEND(x) (((x) & 0x1) << 7) +#define G_028808_PER_MRT_BLEND(x) (((x) >> 7) & 0x1) +#define C_028808_PER_MRT_BLEND 0xFFFFFF7F +#define S_028808_TARGET_BLEND_ENABLE(x) (((x) & 0xFF) << 8) +#define G_028808_TARGET_BLEND_ENABLE(x) (((x) >> 8) & 0xFF) +#define C_028808_TARGET_BLEND_ENABLE 0xFFFF00FF +#define S_028808_ROP3(x) (((x) & 0xFF) << 16) +#define G_028808_ROP3(x) (((x) >> 16) & 0xFF) +#define C_028808_ROP3 0xFF00FFFF #define R_028010_DB_DEPTH_INFO 0x028010 #define S_028010_FORMAT(x) (((x) & 0x7) << 0) #define G_028010_FORMAT(x) (((x) >> 0) & 0x7) -- cgit v1.2.3 From 12dac449fba5fe17087fd48ac959a5388875b2d3 Mon Sep 17 00:00:00 2001 From: Jerome Glisse Date: Wed, 4 Aug 2010 16:28:33 -0400 Subject: r600g: force flush on map as temporary fix to readpixel Should allow more piglit test to pass. Need to plugin proper flushing. Signed-off-by: Jerome Glisse --- src/gallium/drivers/r600/r600_texture.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'src/gallium') diff --git a/src/gallium/drivers/r600/r600_texture.c b/src/gallium/drivers/r600/r600_texture.c index 3725bf8560..033c71f6ef 100644 --- a/src/gallium/drivers/r600/r600_texture.c +++ b/src/gallium/drivers/r600/r600_texture.c @@ -234,6 +234,8 @@ void* r600_texture_transfer_map(struct pipe_context *ctx, struct r600_screen *rscreen = r600_screen(ctx->screen); char *map; + r600_flush(ctx, 0, NULL); + resource = (struct r600_resource *)transfer->resource; if (radeon_bo_map(rscreen->rw, resource->bo)) { return NULL; -- cgit v1.2.3 From ccbd9ae7cc5b3fcda23fe79573e70b4fc40f3939 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Thu, 5 Aug 2010 10:58:02 +0100 Subject: docs: clarify point sprite discussion Plagiarizes email explanation from Roland. --- src/gallium/docs/source/cso/rasterizer.rst | 39 ++++++++++++++++++------------ 1 file changed, 23 insertions(+), 16 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/docs/source/cso/rasterizer.rst b/src/gallium/docs/source/cso/rasterizer.rst index ad1612f93e..ee3419ccfc 100644 --- a/src/gallium/docs/source/cso/rasterizer.rst +++ b/src/gallium/docs/source/cso/rasterizer.rst @@ -126,11 +126,15 @@ sprite_coord_enable Specifies if a texture unit has its texture coordinates replaced or not. This is a packed bitfield containing the enable for all texcoords -- if all bits -are zero, point sprites are effectively disabled. If any bit is set, then -point_smooth and point_quad_rasterization are ignored; point smoothing is -disabled and points are always rasterized as quads. If enabled, the four -vertices of the resulting quad will be assigned texture coordinates, -according to sprite_coord_mode. +are zero, point sprites are effectively disabled. + +If any bit is set, then point_smooth MUST be disabled (there are no +round sprites) and point_quad_rasterization MUST be true (sprites are +always rasterized as quads). Any mismatch between these states should +be considered a bug in the state-tracker. + +If enabled, the four vertices of the resulting quad will be assigned +texture coordinates, according to sprite_coord_mode. sprite_coord_mode ^^^^^^^^^^^^^^^^^ @@ -141,20 +145,23 @@ have coordinates (0,0,0,1). For PIPE_SPRITE_COORD_UPPER_LEFT, the upper-left vertex will have coordinates (0,0,0,1). This state is used by :ref:`Draw` to generate texcoords. -.. note:: - - When geometry shaders are available, a special geometry shader could be - used instead of this functionality, to convert incoming points into quads - with the proper texture coordinates. - point_quad_rasterization ^^^^^^^^^^^^^^^^^^^^^^^^ -Determines if points should be rasterized as quads or points. Certain APIs, -like Direct3D, always use quad rasterization for points, regardless of -whether point sprites are enabled or not. If this state is enabled, point -smoothing and antialiasing are disabled. If it is disabled, point sprite -coordinates are not generated. +Determines if points should be rasterized according to quad or point +rasterization rules. + +OpenGL actually has quite different rasterization rules for points and +point sprites - hence this indicates if points should be rasterized as +points or according to point sprite (which decomposes them into quads, +basically) rules. + +Additionally Direct3D will always use quad rasterization rules for +points, regardless of whether point sprites are enabled or not. + +If this state is enabled, point smoothing and antialiasing are +disabled. If it is disabled, point sprite coordinates are not +generated. .. note:: -- cgit v1.2.3 From 6eb2a7fbafd49e75b6cbbee57f23dda63eff73ef Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Wed, 7 Jul 2010 15:20:19 +0200 Subject: r300g: implement hyper-z support. (v4) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This implements fast Z clear, Z compression, and HiZ support for r300->r500 GPUs. It also allows cbzb clears when fast Z clears are being used for the ZB. It requires a kernel with hyper-z support. Thanks to Marek Olšák , who started this off, and Alex Deucher at AMD for providing lots of hints. v2: squashed zmask ram size fix] squashed r300g/blitter: fix Z readback when compressed] v3: rebase around texture changes in master - .1 fix more bits v4: migrated to using u_mm in r300_texture to manage hiz/zmask rams consistently disabled HiZ when using OQ flush z-cache before turning hyper-z off update hyper-z state on dsa state change store depthclearvalue across cbzb clears and replace it afterwards. Signed-off-by: Dave Airlie --- src/gallium/auxiliary/util/u_blitter.c | 44 +++++ src/gallium/auxiliary/util/u_blitter.h | 2 + src/gallium/drivers/r300/r300_blit.c | 69 +++++++- src/gallium/drivers/r300/r300_chipset.c | 54 +++++- src/gallium/drivers/r300/r300_chipset.h | 14 +- src/gallium/drivers/r300/r300_context.c | 38 ++++- src/gallium/drivers/r300/r300_context.h | 31 +++- src/gallium/drivers/r300/r300_debug.c | 1 + src/gallium/drivers/r300/r300_emit.c | 142 ++++++++++++++- src/gallium/drivers/r300/r300_emit.h | 3 + src/gallium/drivers/r300/r300_flush.c | 3 +- src/gallium/drivers/r300/r300_hyperz.c | 237 +++++++++++++++++++++++++- src/gallium/drivers/r300/r300_hyperz.h | 5 + src/gallium/drivers/r300/r300_reg.h | 1 + src/gallium/drivers/r300/r300_render.c | 3 +- src/gallium/drivers/r300/r300_screen.h | 1 + src/gallium/drivers/r300/r300_state.c | 71 ++++++-- src/gallium/drivers/r300/r300_state_derived.c | 4 +- src/gallium/drivers/r300/r300_texture.c | 9 + src/gallium/drivers/r300/r300_winsys.h | 1 + src/gallium/winsys/radeon/drm/radeon_drm.c | 10 ++ src/gallium/winsys/radeon/drm/radeon_r300.c | 2 + src/gallium/winsys/radeon/drm/radeon_winsys.h | 3 + 23 files changed, 701 insertions(+), 47 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/auxiliary/util/u_blitter.c b/src/gallium/auxiliary/util/u_blitter.c index 0d94aaae95..b5b86b7214 100644 --- a/src/gallium/auxiliary/util/u_blitter.c +++ b/src/gallium/auxiliary/util/u_blitter.c @@ -87,6 +87,7 @@ struct blitter_context_priv void *dsa_write_depth_keep_stencil; void *dsa_keep_depth_stencil; void *dsa_keep_depth_write_stencil; + void *dsa_flush_depth_stencil; void *velem_state; @@ -156,6 +157,10 @@ struct blitter_context *util_blitter_create(struct pipe_context *pipe) ctx->dsa_keep_depth_stencil = pipe->create_depth_stencil_alpha_state(pipe, &dsa); + dsa.depth.writemask = 1; + ctx->dsa_flush_depth_stencil = + pipe->create_depth_stencil_alpha_state(pipe, &dsa); + dsa.depth.enabled = 1; dsa.depth.writemask = 1; dsa.depth.func = PIPE_FUNC_ALWAYS; @@ -940,3 +945,42 @@ void util_blitter_clear_depth_stencil(struct blitter_context *blitter, UTIL_BLITTER_ATTRIB_NONE, NULL); blitter_restore_CSOs(ctx); } + +/* Clear a region of a depth stencil surface. */ +void util_blitter_flush_depth_stencil(struct blitter_context *blitter, + struct pipe_surface *dstsurf) +{ + struct blitter_context_priv *ctx = (struct blitter_context_priv*)blitter; + struct pipe_context *pipe = ctx->base.pipe; + struct pipe_framebuffer_state fb_state; + + assert(dstsurf->texture); + if (!dstsurf->texture) + return; + + /* check the saved state */ + blitter_check_saved_CSOs(ctx); + assert(blitter->saved_fb_state.nr_cbufs != ~0); + + /* bind CSOs */ + pipe->bind_blend_state(pipe, ctx->blend_keep_color); + pipe->bind_depth_stencil_alpha_state(pipe, ctx->dsa_flush_depth_stencil); + + pipe->bind_rasterizer_state(pipe, ctx->rs_state); + pipe->bind_fs_state(pipe, blitter_get_fs_col(ctx, 0)); + pipe->bind_vs_state(pipe, ctx->vs_col); + pipe->bind_vertex_elements_state(pipe, ctx->velem_state); + + /* set a framebuffer state */ + fb_state.width = dstsurf->width; + fb_state.height = dstsurf->height; + fb_state.nr_cbufs = 0; + fb_state.cbufs[0] = 0; + fb_state.zsbuf = dstsurf; + pipe->set_framebuffer_state(pipe, &fb_state); + + blitter_set_dst_dimensions(ctx, dstsurf->width, dstsurf->height); + blitter->draw_rectangle(blitter, 0, 0, dstsurf->width, dstsurf->height, 0, + UTIL_BLITTER_ATTRIB_NONE, NULL); + blitter_restore_CSOs(ctx); +} diff --git a/src/gallium/auxiliary/util/u_blitter.h b/src/gallium/auxiliary/util/u_blitter.h index ba3f92eca8..f316587dea 100644 --- a/src/gallium/auxiliary/util/u_blitter.h +++ b/src/gallium/auxiliary/util/u_blitter.h @@ -200,6 +200,8 @@ void util_blitter_clear_depth_stencil(struct blitter_context *blitter, unsigned dstx, unsigned dsty, unsigned width, unsigned height); +void util_blitter_flush_depth_stencil(struct blitter_context *blitter, + struct pipe_surface *dstsurf); /* The functions below should be used to save currently bound constant state * objects inside a driver. The objects are automatically restored at the end * of the util_blitter_{clear, copy_region, fill_region} functions and then diff --git a/src/gallium/drivers/r300/r300_blit.c b/src/gallium/drivers/r300/r300_blit.c index d125196b6d..6f8d9abfc8 100644 --- a/src/gallium/drivers/r300/r300_blit.c +++ b/src/gallium/drivers/r300/r300_blit.c @@ -22,6 +22,7 @@ #include "r300_context.h" #include "r300_texture.h" +#include "r300_winsys.h" #include "util/u_format.h" #include "util/u_pack_color.h" @@ -81,7 +82,7 @@ static void r300_blitter_end(struct r300_context *r300) } static uint32_t r300_depth_clear_cb_value(enum pipe_format format, - const float* rgba) + const float* rgba) { union util_color uc; util_pack_color(rgba, format, &uc); @@ -98,6 +99,9 @@ static boolean r300_cbzb_clear_allowed(struct r300_context *r300, struct pipe_framebuffer_state *fb = (struct pipe_framebuffer_state*)r300->fb_state.state; + if (r300->z_fastfill) + clear_buffers &= ~(PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL); + /* Only color clear allowed, and only one colorbuffer. */ if (clear_buffers != PIPE_CLEAR_COLOR || fb->nr_cbufs != 1) return FALSE; @@ -105,6 +109,23 @@ static boolean r300_cbzb_clear_allowed(struct r300_context *r300, return r300_surface(fb->cbufs[0])->cbzb_allowed; } +static uint32_t r300_depth_clear_value(enum pipe_format format, + double depth, unsigned stencil) +{ + switch (format) { + case PIPE_FORMAT_Z16_UNORM: + case PIPE_FORMAT_X8Z24_UNORM: + return util_pack_z(format, depth); + + case PIPE_FORMAT_S8_USCALED_Z24_UNORM: + return util_pack_z_stencil(format, depth, stencil); + + default: + assert(0); + return 0; + } +} + /* Clear currently bound buffers. */ static void r300_clear(struct pipe_context* pipe, unsigned buffers, @@ -154,6 +175,22 @@ static void r300_clear(struct pipe_context* pipe, (struct r300_hyperz_state*)r300->hyperz_state.state; uint32_t width = fb->width; uint32_t height = fb->height; + boolean has_hyperz = r300->rws->get_value(r300->rws, R300_CAN_HYPERZ); + uint32_t hyperz_dcv = 0; + + /* Enable fast Z clear. + * The zbuffer must be in micro-tiled mode, otherwise it locks up. */ + if ((buffers & (PIPE_CLEAR_DEPTH|PIPE_CLEAR_STENCIL)) && has_hyperz) { + + hyperz_dcv = hyperz->zb_depthclearvalue = + r300_depth_clear_value(fb->zsbuf->format, depth, stencil); + + r300_mark_fb_state_dirty(r300, R300_CHANGED_ZCLEAR_FLAG); + if (r300->z_compression || r300->z_fastfill) + r300->zmask_clear.dirty = TRUE; + if (r300->hiz_enable) + r300->hiz_clear.dirty = TRUE; + } /* Enable CBZB clear. */ if (r300_cbzb_clear_allowed(r300, buffers)) { @@ -181,6 +218,7 @@ static void r300_clear(struct pipe_context* pipe, /* Disable CBZB clear. */ if (r300->cbzb_clear) { r300->cbzb_clear = FALSE; + hyperz->zb_depthclearvalue = hyperz_dcv; r300_mark_fb_state_dirty(r300, R300_CHANGED_CBZB_FLAG); } @@ -221,6 +259,29 @@ static void r300_clear_depth_stencil(struct pipe_context *pipe, r300_blitter_end(r300); } +/* Clear a region of a depth stencil surface. */ +static void r300_flush_depth_stencil(struct pipe_context *pipe, + struct pipe_resource *dst, + struct pipe_subresource subdst) +{ + struct r300_context *r300 = r300_context(pipe); + struct pipe_surface *dstsurf; + struct r300_texture *tex = r300_texture(dst); + + /* only flush the zmask if we have one attached to this texture */ + if (!tex->zmask_mem[subdst.level]) + return; + + dstsurf = pipe->screen->get_tex_surface(pipe->screen, dst, + subdst.face, subdst.level, 0, + PIPE_BIND_DEPTH_STENCIL); + r300->z_decomp_rd = TRUE; + r300_blitter_begin(r300, R300_CLEAR_SURFACE); + util_blitter_flush_depth_stencil(r300->blitter, dstsurf); + r300_blitter_end(r300); + r300->z_decomp_rd = FALSE; +} + /* Copy a block of pixels from one surface to another using HW. */ static void r300_hw_copy_region(struct pipe_context* pipe, struct pipe_resource *dst, @@ -252,7 +313,7 @@ static void r300_resource_copy_region(struct pipe_context *pipe, { enum pipe_format old_format = dst->format; enum pipe_format new_format = old_format; - + boolean is_depth; if (!pipe->screen->is_format_supported(pipe->screen, old_format, src->target, src->nr_samples, @@ -279,6 +340,10 @@ static void r300_resource_copy_region(struct pipe_context *pipe, } } + is_depth = util_format_get_component_bits(src->format, UTIL_FORMAT_COLORSPACE_ZS, 0) != 0; + if (is_depth) { + r300_flush_depth_stencil(pipe, src, subsrc); + } if (old_format != new_format) { dst->format = new_format; src->format = new_format; diff --git a/src/gallium/drivers/r300/r300_chipset.c b/src/gallium/drivers/r300/r300_chipset.c index 21f3b9d261..2df25f9c8e 100644 --- a/src/gallium/drivers/r300/r300_chipset.c +++ b/src/gallium/drivers/r300/r300_chipset.c @@ -36,7 +36,7 @@ void r300_parse_chipset(struct r300_capabilities* caps) caps->num_vert_fpus = 2; caps->num_tex_units = 16; caps->has_tcl = debug_get_bool_option("RADEON_NO_TCL", FALSE) ? FALSE : TRUE; - caps->has_hiz = TRUE; + caps->hiz_ram = 0; caps->is_r400 = FALSE; caps->is_r500 = FALSE; caps->high_second_pipe = FALSE; @@ -49,6 +49,8 @@ void r300_parse_chipset(struct r300_capabilities* caps) caps->family = CHIP_FAMILY_R300; caps->high_second_pipe = TRUE; caps->num_vert_fpus = 4; + caps->hiz_ram = R300_HIZ_LIMIT; + caps->zmask_ram = PIPE_ZMASK_SIZE; break; case 0x4145: @@ -61,6 +63,8 @@ void r300_parse_chipset(struct r300_capabilities* caps) caps->family = CHIP_FAMILY_R300; caps->high_second_pipe = TRUE; caps->num_vert_fpus = 4; + caps->hiz_ram = R300_HIZ_LIMIT; + caps->zmask_ram = PIPE_ZMASK_SIZE; break; case 0x4150: @@ -77,8 +81,8 @@ void r300_parse_chipset(struct r300_capabilities* caps) case 0x4E54: case 0x4E56: caps->family = CHIP_FAMILY_RV350; - caps->has_hiz = FALSE; caps->high_second_pipe = TRUE; + caps->zmask_ram = RV3xx_ZMASK_SIZE; break; case 0x4148: @@ -91,12 +95,16 @@ void r300_parse_chipset(struct r300_capabilities* caps) caps->family = CHIP_FAMILY_R350; caps->high_second_pipe = TRUE; caps->num_vert_fpus = 4; + caps->hiz_ram = R300_HIZ_LIMIT; + caps->zmask_ram = PIPE_ZMASK_SIZE; break; case 0x4E4A: caps->family = CHIP_FAMILY_R360; caps->high_second_pipe = TRUE; caps->num_vert_fpus = 4; + caps->hiz_ram = R300_HIZ_LIMIT; + caps->zmask_ram = PIPE_ZMASK_SIZE; break; case 0x5460: @@ -108,8 +116,8 @@ void r300_parse_chipset(struct r300_capabilities* caps) case 0x5B64: case 0x5B65: caps->family = CHIP_FAMILY_RV370; - caps->has_hiz = FALSE; caps->high_second_pipe = TRUE; + caps->zmask_ram = RV3xx_ZMASK_SIZE; break; case 0x3150: @@ -120,6 +128,8 @@ void r300_parse_chipset(struct r300_capabilities* caps) case 0x3E54: caps->family = CHIP_FAMILY_RV380; caps->high_second_pipe = TRUE; + caps->hiz_ram = R300_HIZ_LIMIT; + caps->zmask_ram = RV3xx_ZMASK_SIZE; break; case 0x4A48: @@ -135,6 +145,8 @@ void r300_parse_chipset(struct r300_capabilities* caps) caps->family = CHIP_FAMILY_R420; caps->num_vert_fpus = 6; caps->is_r400 = TRUE; + caps->hiz_ram = R300_HIZ_LIMIT; + caps->zmask_ram = PIPE_ZMASK_SIZE; break; case 0x5548: @@ -149,6 +161,8 @@ void r300_parse_chipset(struct r300_capabilities* caps) caps->family = CHIP_FAMILY_R423; caps->num_vert_fpus = 6; caps->is_r400 = TRUE; + caps->hiz_ram = R300_HIZ_LIMIT; + caps->zmask_ram = PIPE_ZMASK_SIZE; break; case 0x554C: @@ -161,6 +175,8 @@ void r300_parse_chipset(struct r300_capabilities* caps) caps->family = CHIP_FAMILY_R430; caps->num_vert_fpus = 6; caps->is_r400 = TRUE; + caps->hiz_ram = R300_HIZ_LIMIT; + caps->zmask_ram = PIPE_ZMASK_SIZE; break; case 0x5D4C: @@ -172,6 +188,8 @@ void r300_parse_chipset(struct r300_capabilities* caps) caps->family = CHIP_FAMILY_R480; caps->num_vert_fpus = 6; caps->is_r400 = TRUE; + caps->hiz_ram = R300_HIZ_LIMIT; + caps->zmask_ram = PIPE_ZMASK_SIZE; break; case 0x4B48: @@ -182,6 +200,8 @@ void r300_parse_chipset(struct r300_capabilities* caps) caps->family = CHIP_FAMILY_R481; caps->num_vert_fpus = 6; caps->is_r400 = TRUE; + caps->hiz_ram = R300_HIZ_LIMIT; + caps->zmask_ram = PIPE_ZMASK_SIZE; break; case 0x5E4C: @@ -199,34 +219,36 @@ void r300_parse_chipset(struct r300_capabilities* caps) caps->family = CHIP_FAMILY_RV410; caps->num_vert_fpus = 6; caps->is_r400 = TRUE; + caps->hiz_ram = R300_HIZ_LIMIT; + caps->zmask_ram = PIPE_ZMASK_SIZE; break; case 0x5954: case 0x5955: caps->family = CHIP_FAMILY_RS480; - caps->has_hiz = FALSE; caps->has_tcl = FALSE; + caps->zmask_ram = RV3xx_ZMASK_SIZE; break; case 0x5974: case 0x5975: caps->family = CHIP_FAMILY_RS482; - caps->has_hiz = FALSE; caps->has_tcl = FALSE; + caps->zmask_ram = RV3xx_ZMASK_SIZE; break; case 0x5A41: case 0x5A42: caps->family = CHIP_FAMILY_RS400; - caps->has_hiz = FALSE; caps->has_tcl = FALSE; + caps->zmask_ram = RV3xx_ZMASK_SIZE; break; case 0x5A61: case 0x5A62: caps->family = CHIP_FAMILY_RC410; - caps->has_hiz = FALSE; caps->has_tcl = FALSE; + caps->zmask_ram = RV3xx_ZMASK_SIZE; break; case 0x791E: @@ -234,6 +256,8 @@ void r300_parse_chipset(struct r300_capabilities* caps) caps->family = CHIP_FAMILY_RS690; caps->has_tcl = FALSE; caps->is_r400 = TRUE; + caps->hiz_ram = R300_HIZ_LIMIT; + caps->zmask_ram = PIPE_ZMASK_SIZE; break; case 0x793F: @@ -242,6 +266,8 @@ void r300_parse_chipset(struct r300_capabilities* caps) caps->family = CHIP_FAMILY_RS600; caps->has_tcl = FALSE; caps->is_r400 = TRUE; + caps->hiz_ram = R300_HIZ_LIMIT; + caps->zmask_ram = PIPE_ZMASK_SIZE; break; case 0x796C: @@ -251,6 +277,8 @@ void r300_parse_chipset(struct r300_capabilities* caps) caps->family = CHIP_FAMILY_RS740; caps->has_tcl = FALSE; caps->is_r400 = TRUE; + caps->hiz_ram = R300_HIZ_LIMIT; + caps->zmask_ram = PIPE_ZMASK_SIZE; break; case 0x7100: @@ -270,6 +298,8 @@ void r300_parse_chipset(struct r300_capabilities* caps) caps->family = CHIP_FAMILY_R520; caps->num_vert_fpus = 8; caps->is_r500 = TRUE; + caps->hiz_ram = R300_HIZ_LIMIT; + caps->zmask_ram = PIPE_ZMASK_SIZE; break; case 0x7140: @@ -313,6 +343,8 @@ void r300_parse_chipset(struct r300_capabilities* caps) caps->family = CHIP_FAMILY_RV515; caps->num_vert_fpus = 2; caps->is_r500 = TRUE; + caps->hiz_ram = R300_HIZ_LIMIT; + caps->zmask_ram = PIPE_ZMASK_SIZE; break; case 0x71C0: @@ -334,6 +366,8 @@ void r300_parse_chipset(struct r300_capabilities* caps) caps->family = CHIP_FAMILY_RV530; caps->num_vert_fpus = 5; caps->is_r500 = TRUE; + caps->hiz_ram = RV530_HIZ_LIMIT; + caps->zmask_ram = PIPE_ZMASK_SIZE; break; case 0x7240: @@ -354,12 +388,16 @@ void r300_parse_chipset(struct r300_capabilities* caps) caps->family = CHIP_FAMILY_R580; caps->num_vert_fpus = 8; caps->is_r500 = TRUE; + caps->hiz_ram = RV530_HIZ_LIMIT; + caps->zmask_ram = PIPE_ZMASK_SIZE; break; case 0x7280: caps->family = CHIP_FAMILY_RV570; caps->num_vert_fpus = 8; caps->is_r500 = TRUE; + caps->hiz_ram = RV530_HIZ_LIMIT; + caps->zmask_ram = PIPE_ZMASK_SIZE; break; case 0x7281: @@ -376,6 +414,8 @@ void r300_parse_chipset(struct r300_capabilities* caps) caps->family = CHIP_FAMILY_RV560; caps->num_vert_fpus = 8; caps->is_r500 = TRUE; + caps->hiz_ram = RV530_HIZ_LIMIT; + caps->zmask_ram = PIPE_ZMASK_SIZE; break; default: diff --git a/src/gallium/drivers/r300/r300_chipset.h b/src/gallium/drivers/r300/r300_chipset.h index 65750f54e7..e7ca642b4f 100644 --- a/src/gallium/drivers/r300/r300_chipset.h +++ b/src/gallium/drivers/r300/r300_chipset.h @@ -25,6 +25,14 @@ #include "pipe/p_compiler.h" +/* these are sizes in dwords */ +#define R300_HIZ_LIMIT 10240 +#define RV530_HIZ_LIMIT 15360 + +/* rv3xx have only one pipe */ +#define PIPE_ZMASK_SIZE 4096 +#define RV3xx_ZMASK_SIZE 5120 + /* Structure containing all the possible information about a specific Radeon * in the R3xx, R4xx, and R5xx families. */ struct r300_capabilities { @@ -42,8 +50,10 @@ struct r300_capabilities { unsigned num_tex_units; /* Whether or not TCL is physically present */ boolean has_tcl; - /* Some chipsets do not have HiZ RAM. */ - boolean has_hiz; + /* Some chipsets do not have HiZ RAM - other have varying amounts . */ + int hiz_ram; + /* some chipsets have zmask ram per pipe some don't */ + int zmask_ram; /* Whether or not this is RV350 or newer, including all r400 and r500 * chipsets. The differences compared to the oldest r300 chips are: * - Blend LTE/GTE thresholds diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c index df90359058..0668fbc151 100644 --- a/src/gallium/drivers/r300/r300_context.c +++ b/src/gallium/drivers/r300/r300_context.c @@ -30,6 +30,7 @@ #include "r300_cb.h" #include "r300_context.h" #include "r300_emit.h" +#include "r300_hyperz.h" #include "r300_screen.h" #include "r300_screen_buffer.h" #include "r300_winsys.h" @@ -114,6 +115,10 @@ static void r300_destroy_context(struct pipe_context* context) u_upload_destroy(r300->upload_vb); u_upload_destroy(r300->upload_ib); + /* setup hyper-z mm */ + if (r300->rws->get_value(r300->rws, R300_CAN_HYPERZ)) + r300_hyperz_destroy_mm(r300); + translate_cache_destroy(r300->tran.translate_cache); r300_release_referenced_objects(r300); @@ -166,6 +171,8 @@ static void r300_setup_atoms(struct r300_context* r300) boolean is_r500 = r300->screen->caps.is_r500; boolean has_tcl = r300->screen->caps.has_tcl; boolean drm_2_3_0 = r300->rws->get_value(r300->rws, R300_VID_DRM_2_3_0); + boolean has_hyperz = r300->rws->get_value(r300->rws, R300_CAN_HYPERZ); + boolean has_hiz_ram = r300->screen->caps.hiz_ram > 0; /* Create the actual atom list. * @@ -188,8 +195,9 @@ static void r300_setup_atoms(struct r300_context* r300) R300_INIT_ATOM(gpu_flush, 9); R300_INIT_ATOM(aa_state, 4); R300_INIT_ATOM(fb_state, 0); + if (has_hyperz) + R300_INIT_ATOM(hyperz_state, is_rv350 ? 10 : 8); /* ZB (unpipelined), SC. */ - R300_INIT_ATOM(hyperz_state, 6); R300_INIT_ATOM(ztop_state, 2); /* ZB, FG. */ R300_INIT_ATOM(dsa_state, is_r500 ? 8 : 6); @@ -220,6 +228,13 @@ static void r300_setup_atoms(struct r300_context* r300) /* TX. */ R300_INIT_ATOM(texture_cache_inval, 2); R300_INIT_ATOM(textures_state, 0); + if (has_hyperz) { + /* HiZ Clear */ + if (has_hiz_ram) + R300_INIT_ATOM(hiz_clear, 0); + /* zmask clear */ + R300_INIT_ATOM(zmask_clear, 0); + } /* ZB (unpipelined), SU. */ R300_INIT_ATOM(query_start, 4); @@ -236,7 +251,8 @@ static void r300_setup_atoms(struct r300_context* r300) r300->clip_state.state = CALLOC_STRUCT(r300_clip_state); r300->fb_state.state = CALLOC_STRUCT(pipe_framebuffer_state); r300->gpu_flush.state = CALLOC_STRUCT(pipe_framebuffer_state); - r300->hyperz_state.state = CALLOC_STRUCT(r300_hyperz_state); + if (has_hyperz) + r300->hyperz_state.state = CALLOC_STRUCT(r300_hyperz_state); r300->invariant_state.state = CALLOC_STRUCT(r300_invariant_state); r300->rs_block_state.state = CALLOC_STRUCT(r300_rs_block); r300->scissor_state.state = CALLOC_STRUCT(pipe_scissor_state); @@ -282,8 +298,7 @@ static void r300_init_states(struct pipe_context *pipe) (struct r300_vap_invariant_state*)r300->vap_invariant_state.state; struct r300_invariant_state *invariant = (struct r300_invariant_state*)r300->invariant_state.state; - struct r300_hyperz_state *hyperz = - (struct r300_hyperz_state*)r300->hyperz_state.state; + CB_LOCALS; pipe->set_blend_color(pipe, &bc); @@ -350,11 +365,20 @@ static void r300_init_states(struct pipe_context *pipe) } /* Initialize the hyperz state. */ + if (r300->rws->get_value(r300->rws, R300_CAN_HYPERZ)) { - BEGIN_CB(&hyperz->cb_begin, r300->hyperz_state.size); + struct r300_hyperz_state *hyperz = + (struct r300_hyperz_state*)r300->hyperz_state.state; + BEGIN_CB(&hyperz->cb_flush_begin, r300->hyperz_state.size); + OUT_CB_REG(R300_ZB_ZCACHE_CTLSTAT, + R300_ZB_ZCACHE_CTLSTAT_ZC_FLUSH_FLUSH_AND_FREE); OUT_CB_REG(R300_ZB_BW_CNTL, 0); OUT_CB_REG(R300_ZB_DEPTHCLEARVALUE, 0); OUT_CB_REG(R300_SC_HYPERZ, R300_SC_HYPERZ_ADJ_2); + + if (r300->screen->caps.is_rv350) { + OUT_CB_REG(R300_GB_Z_PEQ_CONFIG, 0); + } END_CB; } } @@ -415,6 +439,10 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen, rws->cs_set_flush(r300->cs, r300_flush_cb, r300); + /* setup hyper-z mm */ + if (r300->rws->get_value(r300->rws, R300_CAN_HYPERZ)) + r300_hyperz_init_mm(r300); + r300->upload_ib = u_upload_create(&r300->context, 32 * 1024, 16, PIPE_BIND_INDEX_BUFFER); diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h index 7c77a46016..d86a5c8fc9 100644 --- a/src/gallium/drivers/r300/r300_context.h +++ b/src/gallium/drivers/r300/r300_context.h @@ -106,13 +106,19 @@ struct r300_dsa_state { }; struct r300_hyperz_state { + int current_func; /* -1 after a clear before first op */ + int flush; /* This is actually a command buffer with named dwords. */ + uint32_t cb_flush_begin; + uint32_t zb_zcache_ctlstat; /* R300_ZB_CACHE_CNTL */ uint32_t cb_begin; uint32_t zb_bw_cntl; /* R300_ZB_BW_CNTL */ uint32_t cb_reg1; uint32_t zb_depthclearvalue; /* R300_ZB_DEPTHCLEARVALUE */ uint32_t cb_reg2; uint32_t sc_hyperz; /* R300_SC_HYPERZ */ + uint32_t cb_reg3; + uint32_t gb_z_peq_config; /* R300_GB_Z_PEQ_CONFIG: 0x4028 */ }; struct r300_gpu_flush { @@ -321,6 +327,7 @@ struct r300_surface { /* Whether the CBZB clear is allowed on the surface. */ boolean cbzb_allowed; + }; struct r300_texture_desc { @@ -387,6 +394,10 @@ struct r300_texture { /* All bits should be filled in. */ struct r300_texture_fb_state fb_state; + /* hyper-z memory allocs */ + struct mem_block *hiz_mem[R300_MAX_TEXTURE_LEVELS]; + struct mem_block *zmask_mem[R300_MAX_TEXTURE_LEVELS]; + /* This is the level tiling flags were last time set for. * It's used to prevent redundant tiling-flags changes from happening.*/ unsigned surface_level; @@ -512,6 +523,10 @@ struct r300_context { struct r300_atom texture_cache_inval; /* GPU flush. */ struct r300_atom gpu_flush; + /* HiZ clear */ + struct r300_atom hiz_clear; + /* zmask clear */ + struct r300_atom zmask_clear; /* Invariant state. This must be emitted to get the engine started. */ struct r300_atom invariant_state; @@ -549,8 +564,19 @@ struct r300_context { boolean two_sided_color; /* Incompatible vertex buffer layout? (misaligned stride or buffer_offset) */ boolean incompatible_vb_layout; - + /* Whether fast zclear is enabled. */ + boolean z_fastfill; +#define R300_Z_COMPRESS_44 1 +#define RV350_Z_COMPRESS_88 2 + int z_compression; + boolean hiz_enable; boolean cbzb_clear; + boolean z_decomp_rd; + + /* two mem block managers for hiz/zmask ram space */ + struct mem_block *hiz_mm; + struct mem_block *zmask_mm; + /* upload managers */ struct u_upload_mgr *upload_vb; struct u_upload_mgr *upload_ib; @@ -621,7 +647,8 @@ void r300_plug_in_stencil_ref_fallback(struct r300_context *r300); /* r300_state.c */ enum r300_fb_state_change { R300_CHANGED_FB_STATE = 0, - R300_CHANGED_CBZB_FLAG + R300_CHANGED_CBZB_FLAG, + R300_CHANGED_ZCLEAR_FLAG }; void r300_mark_fb_state_dirty(struct r300_context *r300, diff --git a/src/gallium/drivers/r300/r300_debug.c b/src/gallium/drivers/r300/r300_debug.c index 053a64ea6d..c3e157e99a 100644 --- a/src/gallium/drivers/r300/r300_debug.c +++ b/src/gallium/drivers/r300/r300_debug.c @@ -44,6 +44,7 @@ static const struct debug_named_value debug_options[] = { { "notiling", DBG_NO_TILING, "Disable tiling (for benchmarking)" }, { "noimmd", DBG_NO_IMMD, "Disable immediate mode (for benchmarking)" }, { "stats", DBG_STATS, "Gather statistics" }, + { "hyperz", DBG_HYPERZ, "HyperZ (for debugging)" }, /* must be last */ DEBUG_NAMED_VALUE_END diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c index 36a26a7871..17e180a79a 100644 --- a/src/gallium/drivers/r300/r300_emit.c +++ b/src/gallium/drivers/r300/r300_emit.c @@ -25,6 +25,7 @@ #include "util/u_format.h" #include "util/u_math.h" +#include "util/u_mm.h" #include "util/u_simple_list.h" #include "r300_context.h" @@ -329,6 +330,7 @@ void r300_emit_fb_state(struct r300_context* r300, unsigned size, void* state) struct pipe_framebuffer_state* fb = (struct pipe_framebuffer_state*)state; struct r300_surface* surf; unsigned i; + boolean has_hyperz = r300->rws->get_value(r300->rws, R300_CAN_HYPERZ); CS_LOCALS(r300); BEGIN_CS(size); @@ -364,6 +366,10 @@ void r300_emit_fb_state(struct r300_context* r300, unsigned size, void* state) OUT_CS_REG_SEQ(R300_ZB_DEPTHPITCH, 1); OUT_CS_RELOC(surf->buffer, surf->cbzb_pitch, 0, surf->domain); + + DBG(r300, DBG_CBZB, + "CBZB clearing cbuf %08x %08x\n", surf->cbzb_format, + surf->cbzb_pitch); } /* Set up a zbuffer. */ else if (fb->zsbuf) { @@ -377,15 +383,32 @@ void r300_emit_fb_state(struct r300_context* r300, unsigned size, void* state) OUT_CS_REG_SEQ(R300_ZB_DEPTHPITCH, 1); OUT_CS_RELOC(surf->buffer, surf->pitch, 0, surf->domain); - /* HiZ RAM. */ - if (r300->screen->caps.has_hiz) { - OUT_CS_REG(R300_ZB_HIZ_OFFSET, 0); - OUT_CS_REG(R300_ZB_HIZ_PITCH, 0); + if (has_hyperz) { + uint32_t surf_pitch; + struct r300_texture *tex; + int level = surf->base.level; + tex = r300_texture(surf->base.texture); + + surf_pitch = surf->pitch & R300_DEPTHPITCH_MASK; + /* HiZ RAM. */ + if (r300->screen->caps.hiz_ram) { + if (tex->hiz_mem[level]) { + OUT_CS_REG(R300_ZB_HIZ_OFFSET, tex->hiz_mem[level]->ofs); + OUT_CS_REG(R300_ZB_HIZ_PITCH, surf_pitch); + } else { + OUT_CS_REG(R300_ZB_HIZ_OFFSET, 0); + OUT_CS_REG(R300_ZB_HIZ_PITCH, 0); + } + } + /* Z Mask RAM. (compressed zbuffer) */ + if (tex->zmask_mem[level]) { + OUT_CS_REG(R300_ZB_ZMASK_OFFSET, tex->zmask_mem[level]->ofs); + OUT_CS_REG(R300_ZB_ZMASK_PITCH, surf_pitch); + } else { + OUT_CS_REG(R300_ZB_ZMASK_OFFSET, 0); + OUT_CS_REG(R300_ZB_ZMASK_PITCH, 0); + } } - - /* Z Mask RAM. (compressed zbuffer) */ - OUT_CS_REG(R300_ZB_ZMASK_OFFSET, 0); - OUT_CS_REG(R300_ZB_ZMASK_PITCH, 0); } END_CS; @@ -394,8 +417,12 @@ void r300_emit_fb_state(struct r300_context* r300, unsigned size, void* state) void r300_emit_hyperz_state(struct r300_context *r300, unsigned size, void *state) { + struct r300_hyperz_state *z = state; CS_LOCALS(r300); - WRITE_CS_TABLE(state, size); + if (z->flush) + WRITE_CS_TABLE(&z->cb_flush_begin, size); + else + WRITE_CS_TABLE(&z->cb_begin, size - 2); } void r300_emit_hyperz_end(struct r300_context *r300) @@ -403,9 +430,11 @@ void r300_emit_hyperz_end(struct r300_context *r300) struct r300_hyperz_state z = *(struct r300_hyperz_state*)r300->hyperz_state.state; + z.flush = 1; z.zb_bw_cntl = 0; z.zb_depthclearvalue = 0; z.sc_hyperz = R300_SC_HYPERZ_ADJ_2; + z.gb_z_peq_config = 0; r300_emit_hyperz_state(r300, r300->hyperz_state.size, &z); } @@ -943,6 +972,101 @@ void r300_emit_viewport_state(struct r300_context* r300, END_CS; } +static void r300_emit_hiz_line_clear(struct r300_context *r300, int start, uint16_t count, uint32_t val) +{ + CS_LOCALS(r300); + BEGIN_CS(4); + OUT_CS_PKT3(R300_PACKET3_3D_CLEAR_HIZ, 2); + OUT_CS(start); + OUT_CS(count); + OUT_CS(val); + END_CS; +} + +static void r300_emit_zmask_line_clear(struct r300_context *r300, int start, uint16_t count, uint32_t val) +{ + CS_LOCALS(r300); + BEGIN_CS(4); + OUT_CS_PKT3(R300_PACKET3_3D_CLEAR_ZMASK, 2); + OUT_CS(start); + OUT_CS(count); + OUT_CS(val); + END_CS; +} + +#define ALIGN_DIVUP(x, y) (((x) + (y) - 1) / (y)) + +void r300_emit_hiz_clear(struct r300_context *r300, unsigned size, void *state) +{ + struct pipe_framebuffer_state *fb = + (struct pipe_framebuffer_state*)r300->fb_state.state; + struct r300_hyperz_state *z = + (struct r300_hyperz_state*)r300->hyperz_state.state; + struct r300_screen* r300screen = r300->screen; + uint32_t stride, offset = 0, height, offset_shift; + struct r300_texture* tex; + int i; + + tex = r300_texture(fb->zsbuf->texture); + stride = tex->desc.stride_in_pixels[fb->zsbuf->level]; + + /* convert from pixels to 4x4 blocks */ + stride = ALIGN_DIVUP(stride, 4); + + stride = ALIGN_DIVUP(stride, r300screen->caps.num_frag_pipes); + /* there are 4 blocks per dwords */ + stride = ALIGN_DIVUP(stride, 4); + + height = ALIGN_DIVUP(fb->zsbuf->height, 4); + + offset_shift = 2; + offset_shift += (r300screen->caps.num_frag_pipes / 2); + + for (i = 0; i < height; i++) { + offset = i * stride; + offset <<= offset_shift; + r300_emit_hiz_line_clear(r300, offset, stride, 0xffffffff); + } + z->current_func = -1; +} + +void r300_emit_zmask_clear(struct r300_context *r300, unsigned size, void *state) +{ + struct pipe_framebuffer_state *fb = + (struct pipe_framebuffer_state*)r300->fb_state.state; + struct r300_screen* r300screen = r300->screen; + uint32_t stride, offset = 0; + struct r300_texture* tex; + uint32_t i, height; + int mult, offset_shift; + + tex = r300_texture(fb->zsbuf->texture); + stride = tex->desc.stride_in_pixels[fb->zsbuf->level]; + + if (r300->z_compression == RV350_Z_COMPRESS_88) + mult = 8; + else + mult = 4; + + height = ALIGN_DIVUP(fb->zsbuf->height, mult); + + offset_shift = 4; + offset_shift += (r300screen->caps.num_frag_pipes / 2); + stride = ALIGN_DIVUP(stride, r300screen->caps.num_frag_pipes); + + /* okay have width in pixels - divide by block width */ + stride = ALIGN_DIVUP(stride, mult); + /* have width in blocks - divide by number of fragment pipes screen width */ + /* 16 blocks per dword */ + stride = ALIGN_DIVUP(stride, 16); + + for (i = 0; i < height; i++) { + offset = i * stride; + offset <<= offset_shift; + r300_emit_zmask_line_clear(r300, offset, stride, 0x0);//0xffffffff); + } +} + void r300_emit_ztop_state(struct r300_context* r300, unsigned size, void* state) { diff --git a/src/gallium/drivers/r300/r300_emit.h b/src/gallium/drivers/r300/r300_emit.h index 5d05039669..2f2c2f2dcb 100644 --- a/src/gallium/drivers/r300/r300_emit.h +++ b/src/gallium/drivers/r300/r300_emit.h @@ -112,6 +112,9 @@ void r300_emit_texture_cache_inval(struct r300_context* r300, unsigned size, voi void r300_emit_invariant_state(struct r300_context *r300, unsigned size, void *state); +void r300_emit_hiz_clear(struct r300_context *r300, unsigned size, void *state); +void r300_emit_zmask_clear(struct r300_context *r300, unsigned size, void *state); + unsigned r300_get_num_dirty_dwords(struct r300_context *r300); /* Emit all dirty state. */ diff --git a/src/gallium/drivers/r300/r300_flush.c b/src/gallium/drivers/r300/r300_flush.c index fe182b6615..7fed9b5d07 100644 --- a/src/gallium/drivers/r300/r300_flush.c +++ b/src/gallium/drivers/r300/r300_flush.c @@ -44,7 +44,8 @@ static void r300_flush(struct pipe_context* pipe, u_upload_flush(r300->upload_ib); if (r300->dirty_hw) { - r300_emit_hyperz_end(r300); + if (r300->rws->get_value(r300->rws, R300_CAN_HYPERZ)) + r300_emit_hyperz_end(r300); r300_emit_query_end(r300); r300->flush_counter++; diff --git a/src/gallium/drivers/r300/r300_hyperz.c b/src/gallium/drivers/r300/r300_hyperz.c index e952895601..e719342a46 100644 --- a/src/gallium/drivers/r300/r300_hyperz.c +++ b/src/gallium/drivers/r300/r300_hyperz.c @@ -21,25 +21,158 @@ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE * USE OR OTHER DEALINGS IN THE SOFTWARE. */ +#include "util/u_format.h" +#include "util/u_mm.h" #include "r300_context.h" #include "r300_hyperz.h" #include "r300_reg.h" #include "r300_fs.h" +#include "r300_emit.h" +#include "r300_texture.h" +/* + HiZ rules - taken from various docs + 1. HiZ only works on depth values + 2. Cannot HiZ if stencil fail or zfail is !KEEP + 3. on R300/400, HiZ is disabled if depth test is EQUAL + 4. comparison changes without clears usually mean disabling HiZ +*/ /*****************************************************************************/ /* The HyperZ setup */ /*****************************************************************************/ +static bool r300_get_sc_hz_max(struct r300_context *r300) +{ + struct r300_dsa_state *dsa_state = r300->dsa_state.state; + int func = dsa_state->z_stencil_control & 0x7; + int ret = R300_SC_HYPERZ_MIN; + + if (func >= 4 && func <= 7) + ret = R300_SC_HYPERZ_MAX; + return ret; +} + +static bool r300_zfunc_same_direction(int func1, int func2) +{ + /* func1 is less/lessthan */ + if (func1 == 1 || func1 == 2) + if (func2 == 3 || func2 == 4 || func2 == 5) + return FALSE; + + if (func2 == 1 || func2 == 2) + if (func1 == 4 || func1 == 5) + return FALSE; + return TRUE; +} + +static int r300_get_hiz_min(struct r300_context *r300) +{ + struct r300_dsa_state *dsa_state = r300->dsa_state.state; + int func = dsa_state->z_stencil_control & 0x7; + int ret = R300_HIZ_MIN; + + if (func == 1 || func == 2) + ret = R300_HIZ_MAX; + return ret; +} + +static boolean r300_dsa_stencil_op_not_keep(struct pipe_stencil_state *s) +{ + if (s->enabled && (s->fail_op != PIPE_STENCIL_OP_KEEP || + s->zfail_op != PIPE_STENCIL_OP_KEEP)) + return TRUE; + return FALSE; +} + +static boolean r300_can_hiz(struct r300_context *r300) +{ + struct r300_dsa_state *dsa_state = r300->dsa_state.state; + struct pipe_depth_stencil_alpha_state *dsa = &dsa_state->dsa; + struct r300_screen* r300screen = r300->screen; + struct r300_hyperz_state *z = r300->hyperz_state.state; + + /* shader writes depth - no HiZ */ + if (r300_fragment_shader_writes_depth(r300_fs(r300))) /* (5) */ + return FALSE; + + if (r300->query_current) + return FALSE; + /* if stencil fail/zfail op is not KEEP */ + if (r300_dsa_stencil_op_not_keep(&dsa->stencil[0]) || + r300_dsa_stencil_op_not_keep(&dsa->stencil[1])) + return FALSE; + + if (dsa->depth.enabled) { + /* if depth func is EQUAL pre-r500 */ + if (dsa->depth.func == PIPE_FUNC_EQUAL && !r300screen->caps.is_r500) + return FALSE; + /* if depth func is NOTEQUAL */ + if (dsa->depth.func == PIPE_FUNC_NOTEQUAL) + return FALSE; + } + /* depth comparison function - if just cleared save and return okay */ + if (z->current_func == -1) { + int func = dsa_state->z_stencil_control & 0x7; + if (func != 0 && func != 7) + z->current_func = dsa_state->z_stencil_control & 0x7; + } else { + /* simple don't change */ + if (!r300_zfunc_same_direction(z->current_func, (dsa_state->z_stencil_control & 0x7))) { + DBG(r300, DBG_HYPERZ, "z func changed direction - disabling hyper-z %d -> %d\n", z->current_func, dsa_state->z_stencil_control); + return FALSE; + } + } + return TRUE; +} + static void r300_update_hyperz(struct r300_context* r300) { struct r300_hyperz_state *z = (struct r300_hyperz_state*)r300->hyperz_state.state; + z->gb_z_peq_config = 0; z->zb_bw_cntl = 0; z->sc_hyperz = R300_SC_HYPERZ_ADJ_2; + z->flush = 0; - if (r300->cbzb_clear) + if (r300->cbzb_clear) { z->zb_bw_cntl |= R300_ZB_CB_CLEAR_CACHE_LINE_WRITE_ONLY; + return; + } + + /* Zbuffer compression. */ + if (r300->z_compression) { + z->zb_bw_cntl |= R300_RD_COMP_ENABLE; + if (r300->z_decomp_rd == false) + z->zb_bw_cntl |= R300_WR_COMP_ENABLE; + /* RV350 and up optimizations. */ + if (r300->z_compression == RV350_Z_COMPRESS_88) + z->gb_z_peq_config |= R300_GB_Z_PEQ_CONFIG_Z_PEQ_SIZE_8_8; + } + + /* Z fastfill. */ + if (r300->z_fastfill) { + z->zb_bw_cntl |= R300_FAST_FILL_ENABLE; /* | R300_FORCE_COMPRESSED_STENCIL_VALUE_ENABLE;*/ + } + + if (r300->hiz_enable) { + bool can_hiz = r300_can_hiz(r300); + if (can_hiz) { + z->zb_bw_cntl |= R300_HIZ_ENABLE; + z->sc_hyperz |= R300_SC_HYPERZ_ENABLE; + z->sc_hyperz |= r300_get_sc_hz_max(r300); + z->zb_bw_cntl |= r300_get_hiz_min(r300); + } + } + + if (r300->screen->caps.is_r500) { + /* XXX Are these bits really available on RV350? */ + z->zb_bw_cntl |= R500_HIZ_FP_EXP_BITS_3; + z->zb_bw_cntl |= + R500_HIZ_EQUAL_REJECT_ENABLE | + R500_PEQ_PACKING_ENABLE | + R500_COVERED_PTR_MASKING_ENABLE; + } } /*****************************************************************************/ @@ -126,15 +259,115 @@ static void r300_update_ztop(struct r300_context* r300) } else { ztop_state->z_buffer_top = R300_ZTOP_ENABLE; } - if (ztop_state->z_buffer_top != old_ztop) r300->ztop_state.dirty = TRUE; } +#define ALIGN_DIVUP(x, y) (((x) + (y) - 1) / (y)) + +static void r300_update_hiz_clear(struct r300_context *r300) +{ + struct pipe_framebuffer_state *fb = + (struct pipe_framebuffer_state*)r300->fb_state.state; + uint32_t height; + + height = ALIGN_DIVUP(fb->zsbuf->height, 4); + r300->hiz_clear.size = height * 4; +} + +static void r300_update_zmask_clear(struct r300_context *r300) +{ + struct pipe_framebuffer_state *fb = + (struct pipe_framebuffer_state*)r300->fb_state.state; + uint32_t height; + int mult; + + if (r300->z_compression == RV350_Z_COMPRESS_88) + mult = 8; + else + mult = 4; + + height = ALIGN_DIVUP(fb->zsbuf->height, mult); + + r300->zmask_clear.size = height * 4; +} + void r300_update_hyperz_state(struct r300_context* r300) { r300_update_ztop(r300); if (r300->hyperz_state.dirty) { r300_update_hyperz(r300); } + + if (r300->hiz_clear.dirty) { + r300_update_hiz_clear(r300); + } + if (r300->zmask_clear.dirty) { + r300_update_zmask_clear(r300); + } +} + +void r300_hiz_alloc_block(struct r300_context *r300, struct r300_surface *surf) +{ + struct r300_texture *tex; + uint32_t zsize, ndw; + int level = surf->base.level; + + tex = r300_texture(surf->base.texture); + + if (tex->hiz_mem[level]) + return; + + zsize = tex->desc.layer_size_in_bytes[level]; + zsize /= util_format_get_blocksize(tex->desc.b.b.format); + ndw = ALIGN_DIVUP(zsize, 64); + + tex->hiz_mem[level] = u_mmAllocMem(r300->hiz_mm, ndw, 0, 0); + return; +} + +void r300_zmask_alloc_block(struct r300_context *r300, struct r300_surface *surf, int compress) +{ + int bsize = 256; + uint32_t zsize, ndw; + int level = surf->base.level; + struct r300_texture *tex; + + tex = r300_texture(surf->base.texture); + + if (tex->zmask_mem[level]) + return; + + zsize = tex->desc.layer_size_in_bytes[level]; + zsize /= util_format_get_blocksize(tex->desc.b.b.format); + + /* each zmask dword represents 16 4x4 blocks - which is 256 pixels + or 16 8x8 depending on the gb peq flag = 1024 pixels */ + if (compress == RV350_Z_COMPRESS_88) + bsize = 1024; + + ndw = ALIGN_DIVUP(zsize, bsize); + tex->zmask_mem[level] = u_mmAllocMem(r300->zmask_mm, ndw, 0, 0); + return; +} + +void r300_hyperz_init_mm(struct r300_context *r300) +{ + struct r300_screen* r300screen = r300->screen; + int frag_pipes = r300screen->caps.num_frag_pipes; + + if (r300screen->caps.hiz_ram) + r300->hiz_mm = u_mmInit(0, r300screen->caps.hiz_ram * frag_pipes); + + r300->zmask_mm = u_mmInit(0, r300screen->caps.zmask_ram * frag_pipes); +} + +void r300_hyperz_destroy_mm(struct r300_context *r300) +{ + struct r300_screen* r300screen = r300->screen; + + if (r300screen->caps.hiz_ram) + u_mmDestroy(r300->hiz_mm); + + u_mmDestroy(r300->zmask_mm); } diff --git a/src/gallium/drivers/r300/r300_hyperz.h b/src/gallium/drivers/r300/r300_hyperz.h index 3df5053b89..09e1ff6625 100644 --- a/src/gallium/drivers/r300/r300_hyperz.h +++ b/src/gallium/drivers/r300/r300_hyperz.h @@ -27,4 +27,9 @@ struct r300_context; void r300_update_hyperz_state(struct r300_context* r300); +void r300_hiz_alloc_block(struct r300_context *r300, struct r300_surface *surf); +void r300_zmask_alloc_block(struct r300_context *r300, struct r300_surface *surf, int compress); + +void r300_hyperz_init_mm(struct r300_context *r300); +void r300_hyperz_destroy_mm(struct r300_context *r300); #endif diff --git a/src/gallium/drivers/r300/r300_reg.h b/src/gallium/drivers/r300/r300_reg.h index 2acc1a903e..99a9d65055 100644 --- a/src/gallium/drivers/r300/r300_reg.h +++ b/src/gallium/drivers/r300/r300_reg.h @@ -3436,6 +3436,7 @@ enum { # define R300_VBPNTR_SIZE1(x) (((x) >> 2) << 16) # define R300_VBPNTR_STRIDE1(x) (((x) >> 2) << 24) +#define R300_PACKET3_3D_CLEAR_ZMASK 0x00003200 #define R300_PACKET3_INDX_BUFFER 0x00003300 # define R300_INDX_BUFFER_DST_SHIFT 0 # define R300_INDX_BUFFER_SKIP_SHIFT 16 diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c index 7c4294bc9f..910f5f7113 100644 --- a/src/gallium/drivers/r300/r300_render.c +++ b/src/gallium/drivers/r300/r300_render.c @@ -223,7 +223,8 @@ static void r300_prepare_for_rendering(struct r300_context *r300, /* Emitted in flush. */ end_dwords += 26; /* emit_query_end */ - end_dwords += r300->hyperz_state.size; /* emit_hyperz_end */ + if (r300->rws->get_value(r300->rws, R300_CAN_HYPERZ)) + end_dwords += r300->hyperz_state.size + 2; /* emit_hyperz_end + zcache flush */ cs_dwords += end_dwords; diff --git a/src/gallium/drivers/r300/r300_screen.h b/src/gallium/drivers/r300/r300_screen.h index 18745b83a0..13a3320b99 100644 --- a/src/gallium/drivers/r300/r300_screen.h +++ b/src/gallium/drivers/r300/r300_screen.h @@ -91,6 +91,7 @@ r300_winsys_screen(struct pipe_screen *screen) { #define DBG_FB (1 << 9) #define DBG_RS_BLOCK (1 << 10) #define DBG_CBZB (1 << 11) +#define DBG_HYPERZ (1 << 12) /* Features. */ #define DBG_ANISOHQ (1 << 16) #define DBG_NO_TILING (1 << 17) diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c index a3383c3878..374aa254f0 100644 --- a/src/gallium/drivers/r300/r300_state.c +++ b/src/gallium/drivers/r300/r300_state.c @@ -25,6 +25,7 @@ #include "util/u_blitter.h" #include "util/u_math.h" +#include "util/u_mm.h" #include "util/u_memory.h" #include "util/u_pack_color.h" @@ -43,6 +44,7 @@ #include "r300_texture.h" #include "r300_vs.h" #include "r300_winsys.h" +#include "r300_hyperz.h" /* r300_state: Functions used to intialize state context by translating * Gallium state objects into semi-native r300 state objects. */ @@ -472,14 +474,14 @@ static void* dsa->dsa = *state; - /* Depth test setup. */ + /* Depth test setup. - separate write mask depth for decomp flush */ + if (state->depth.writemask) { + dsa->z_buffer_control |= R300_Z_WRITE_ENABLE; + } + if (state->depth.enabled) { dsa->z_buffer_control |= R300_Z_ENABLE; - if (state->depth.writemask) { - dsa->z_buffer_control |= R300_Z_WRITE_ENABLE; - } - dsa->z_stencil_control |= (r300_translate_depth_stencil_function(state->depth.func) << R300_Z_FUNC_SHIFT); @@ -592,6 +594,7 @@ static void r300_bind_dsa_state(struct pipe_context* pipe, UPDATE_STATE(state, r300->dsa_state); + r300->hyperz_state.dirty = TRUE; /* Will be updated before the emission. */ r300_dsa_inject_stencilref(r300); } @@ -685,7 +688,8 @@ void r300_mark_fb_state_dirty(struct r300_context *r300, /* What is marked as dirty depends on the enum r300_fb_state_change. */ r300->gpu_flush.dirty = TRUE; r300->fb_state.dirty = TRUE; - r300->hyperz_state.dirty = TRUE; + if (r300->rws->get_value(r300->rws, R300_CAN_HYPERZ)) + r300->hyperz_state.dirty = TRUE; if (change == R300_CHANGED_FB_STATE) { r300->aa_state.dirty = TRUE; @@ -698,7 +702,7 @@ void r300_mark_fb_state_dirty(struct r300_context *r300, if (r300->cbzb_clear) r300->fb_state.size += 10; else if (state->zsbuf) - r300->fb_state.size += r300->screen->caps.has_hiz ? 18 : 14; + r300->fb_state.size += r300->screen->caps.hiz_ram ? 18 : 14; /* The size of the rest of atoms stays the same. */ } @@ -710,8 +714,10 @@ static void struct r300_context* r300 = r300_context(pipe); struct r300_aa_state *aa = (struct r300_aa_state*)r300->aa_state.state; struct pipe_framebuffer_state *old_state = r300->fb_state.state; + boolean has_hyperz = r300->rws->get_value(r300->rws, R300_CAN_HYPERZ); unsigned max_width, max_height, i; uint32_t zbuffer_bpp = 0; + int blocksize; if (r300->screen->caps.is_r500) { max_width = max_height = 4096; @@ -743,17 +749,52 @@ static void r300_mark_fb_state_dirty(r300, R300_CHANGED_FB_STATE); - /* Polygon offset depends on the zbuffer bit depth. */ + r300->hiz_enable = false; + r300->z_fastfill = false; + r300->z_compression = false; + if (state->zsbuf) { - switch (util_format_get_blocksize(state->zsbuf->texture->format)) { - case 2: - zbuffer_bpp = 16; - break; - case 4: - zbuffer_bpp = 24; - break; + blocksize = util_format_get_blocksize(state->zsbuf->texture->format); + switch (blocksize) { + case 2: + zbuffer_bpp = 16; + break; + case 4: + zbuffer_bpp = 24; + break; } + if (has_hyperz) { + struct r300_surface *zs_surf = r300_surface(state->zsbuf); + struct r300_texture *tex; + int compress = r300->screen->caps.is_rv350 ? RV350_Z_COMPRESS_88 : R300_Z_COMPRESS_44; + int level = zs_surf->base.level; + + tex = r300_texture(zs_surf->base.texture); + + /* work out whether we can support hiz on this buffer */ + r300_hiz_alloc_block(r300, zs_surf); + + /* work out whether we can support zmask features on this buffer */ + r300_zmask_alloc_block(r300, zs_surf, compress); + + if (tex->hiz_mem[level]) { + r300->hiz_enable = 1; + } + if (tex->zmask_mem[level]) { + r300->z_fastfill = 1; + /* compression causes hangs on 16-bit */ + if (zbuffer_bpp == 24) + r300->z_compression = compress; + } + DBG(r300, DBG_HYPERZ, + "hyper-z features: hiz: %d @ %08x z-compression: %d z-fastfill: %d @ %08x\n", r300->hiz_enable, + tex->hiz_mem[level] ? tex->hiz_mem[level]->ofs : 0xdeadbeef, + r300->z_compression, r300->z_fastfill, + tex->zmask_mem[level] ? tex->zmask_mem[level]->ofs : 0xdeadbeef); + } + + /* Polygon offset depends on the zbuffer bit depth. */ if (r300->zbuffer_bpp != zbuffer_bpp) { r300->zbuffer_bpp = zbuffer_bpp; diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c index 66f64f0f6a..f3dad4c292 100644 --- a/src/gallium/drivers/r300/r300_state_derived.c +++ b/src/gallium/drivers/r300/r300_state_derived.c @@ -35,6 +35,7 @@ #include "r300_state_inlines.h" #include "r300_texture.h" #include "r300_vs.h" +#include "r300_winsys.h" /* r300_state_derived: Various bits of state which are dependent upon * currently bound CSO data. */ @@ -693,5 +694,6 @@ void r300_update_derived_state(struct r300_context* r300) } } - r300_update_hyperz_state(r300); + if (r300->rws->get_value(r300->rws, R300_CAN_HYPERZ)) + r300_update_hyperz_state(r300); } diff --git a/src/gallium/drivers/r300/r300_texture.c b/src/gallium/drivers/r300/r300_texture.c index fcdca5605e..da8eadd3b5 100644 --- a/src/gallium/drivers/r300/r300_texture.c +++ b/src/gallium/drivers/r300/r300_texture.c @@ -35,6 +35,7 @@ #include "util/u_format_s3tc.h" #include "util/u_math.h" #include "util/u_memory.h" +#include "util/u_mm.h" #include "pipe/p_screen.h" @@ -645,8 +646,16 @@ static void r300_texture_destroy(struct pipe_screen *screen, { struct r300_texture* tex = (struct r300_texture*)texture; struct r300_winsys_screen *rws = (struct r300_winsys_screen *)texture->screen->winsys; + int i; rws->buffer_reference(rws, &tex->buffer, NULL); + for (i = 0; i < R300_MAX_TEXTURE_LEVELS; i++) { + if (tex->hiz_mem[i]) + u_mmFreeMem(tex->hiz_mem[i]); + if (tex->zmask_mem[i]) + u_mmFreeMem(tex->zmask_mem[i]); + } + FREE(tex); } diff --git a/src/gallium/drivers/r300/r300_winsys.h b/src/gallium/drivers/r300/r300_winsys.h index ff11546a64..e7a1ede4fb 100644 --- a/src/gallium/drivers/r300/r300_winsys.h +++ b/src/gallium/drivers/r300/r300_winsys.h @@ -49,6 +49,7 @@ enum r300_value_id { R300_VID_Z_PIPES, R300_VID_SQUARE_TILING_SUPPORT, R300_VID_DRM_2_3_0, + R300_CAN_HYPERZ, }; enum r300_reference_domain { /* bitfield */ diff --git a/src/gallium/winsys/radeon/drm/radeon_drm.c b/src/gallium/winsys/radeon/drm/radeon_drm.c index e9a276362f..e7057ca593 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm.c +++ b/src/gallium/winsys/radeon/drm/radeon_drm.c @@ -130,6 +130,16 @@ static void do_ioctls(int fd, struct radeon_libdrm_winsys* winsys) } winsys->z_pipes = target; + winsys->hyperz = FALSE; +#ifndef RADEON_INFO_WANT_HYPERZ +#define RADEON_INFO_WANT_HYPERZ 7 +#endif + info.request = RADEON_INFO_WANT_HYPERZ; + retval = drmCommandWriteRead(fd, DRM_RADEON_INFO, &info, sizeof(info)); + if (!retval && target == 1) { + winsys->hyperz = TRUE; + } + retval = drmCommandWriteRead(fd, DRM_RADEON_GEM_INFO, &gem_info, sizeof(gem_info)); if (retval) { diff --git a/src/gallium/winsys/radeon/drm/radeon_r300.c b/src/gallium/winsys/radeon/drm/radeon_r300.c index 5544504067..955ae4c045 100644 --- a/src/gallium/winsys/radeon/drm/radeon_r300.c +++ b/src/gallium/winsys/radeon/drm/radeon_r300.c @@ -211,6 +211,8 @@ static uint32_t radeon_get_value(struct r300_winsys_screen *rws, return ws->squaretiling; case R300_VID_DRM_2_3_0: return ws->drm_2_3_0; + case R300_CAN_HYPERZ: + return ws->hyperz; } return 0; } diff --git a/src/gallium/winsys/radeon/drm/radeon_winsys.h b/src/gallium/winsys/radeon/drm/radeon_winsys.h index 533b7b2e2d..52db0d62d2 100644 --- a/src/gallium/winsys/radeon/drm/radeon_winsys.h +++ b/src/gallium/winsys/radeon/drm/radeon_winsys.h @@ -65,6 +65,9 @@ struct radeon_libdrm_winsys { */ boolean drm_2_3_0; + /* hyperz user */ + boolean hyperz; + /* DRM FD */ int fd; -- cgit v1.2.3 From 41f9e60bb30765e9d272bb6d61bb8fddc3623f8b Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Thu, 5 Aug 2010 20:30:31 +1000 Subject: r300g: disable hiz on rv530 for now. On my rv530 at least HiZ is causing rendering issues in gears. --- src/gallium/drivers/r300/r300_chipset.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium') diff --git a/src/gallium/drivers/r300/r300_chipset.c b/src/gallium/drivers/r300/r300_chipset.c index 2df25f9c8e..48c2409211 100644 --- a/src/gallium/drivers/r300/r300_chipset.c +++ b/src/gallium/drivers/r300/r300_chipset.c @@ -366,7 +366,7 @@ void r300_parse_chipset(struct r300_capabilities* caps) caps->family = CHIP_FAMILY_RV530; caps->num_vert_fpus = 5; caps->is_r500 = TRUE; - caps->hiz_ram = RV530_HIZ_LIMIT; + /*caps->hiz_ram = RV530_HIZ_LIMIT;*/ caps->zmask_ram = PIPE_ZMASK_SIZE; break; -- cgit v1.2.3 From b5f104471175830de6474c176ebc567714edcdd6 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Thu, 5 Aug 2010 20:58:51 +1000 Subject: r300g: always emit hyperz state atom. --- src/gallium/drivers/r300/r300_context.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c index 0668fbc151..4658ab0ff5 100644 --- a/src/gallium/drivers/r300/r300_context.c +++ b/src/gallium/drivers/r300/r300_context.c @@ -195,8 +195,7 @@ static void r300_setup_atoms(struct r300_context* r300) R300_INIT_ATOM(gpu_flush, 9); R300_INIT_ATOM(aa_state, 4); R300_INIT_ATOM(fb_state, 0); - if (has_hyperz) - R300_INIT_ATOM(hyperz_state, is_rv350 ? 10 : 8); + R300_INIT_ATOM(hyperz_state, is_rv350 ? 10 : 8); /* ZB (unpipelined), SC. */ R300_INIT_ATOM(ztop_state, 2); /* ZB, FG. */ @@ -251,8 +250,7 @@ static void r300_setup_atoms(struct r300_context* r300) r300->clip_state.state = CALLOC_STRUCT(r300_clip_state); r300->fb_state.state = CALLOC_STRUCT(pipe_framebuffer_state); r300->gpu_flush.state = CALLOC_STRUCT(pipe_framebuffer_state); - if (has_hyperz) - r300->hyperz_state.state = CALLOC_STRUCT(r300_hyperz_state); + r300->hyperz_state.state = CALLOC_STRUCT(r300_hyperz_state); r300->invariant_state.state = CALLOC_STRUCT(r300_invariant_state); r300->rs_block_state.state = CALLOC_STRUCT(r300_rs_block); r300->scissor_state.state = CALLOC_STRUCT(pipe_scissor_state); @@ -365,7 +363,6 @@ static void r300_init_states(struct pipe_context *pipe) } /* Initialize the hyperz state. */ - if (r300->rws->get_value(r300->rws, R300_CAN_HYPERZ)) { struct r300_hyperz_state *hyperz = (struct r300_hyperz_state*)r300->hyperz_state.state; -- cgit v1.2.3 From fc94fd9bfa36d003ec5a1b7041fdb74c93ec3667 Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Wed, 4 Aug 2010 11:34:54 +0200 Subject: r300g: debug_print on startup whether we can use hyper-z --- src/gallium/winsys/radeon/drm/radeon_drm.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/winsys/radeon/drm/radeon_drm.c b/src/gallium/winsys/radeon/drm/radeon_drm.c index e7057ca593..3604827700 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm.c +++ b/src/gallium/winsys/radeon/drm/radeon_drm.c @@ -152,12 +152,14 @@ static void do_ioctls(int fd, struct radeon_libdrm_winsys* winsys) debug_printf("radeon: Successfully grabbed chipset info from kernel!\n" "radeon: DRM version: %d.%d.%d ID: 0x%04x GB: %d Z: %d\n" - "radeon: GART size: %d MB VRAM size: %d MB\n", + "radeon: GART size: %d MB VRAM size: %d MB\n" + "radeon: HyperZ: %s\n", version->version_major, version->version_minor, version->version_patchlevel, winsys->pci_id, winsys->gb_pipes, winsys->z_pipes, winsys->gart_size / 1024 / 1024, - winsys->vram_size / 1024 / 1024); + winsys->vram_size / 1024 / 1024, + winsys->hyperz ? "YES" : "NO"); drmFreeVersion(version); } -- cgit v1.2.3 From a4be3b6b312496305de473b5af219a3afd84f6c6 Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Thu, 5 Aug 2010 19:04:01 +0200 Subject: r300g: fix fb_state atom size --- src/gallium/drivers/r300/r300_state.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c index 374aa254f0..1e6b81d798 100644 --- a/src/gallium/drivers/r300/r300_state.c +++ b/src/gallium/drivers/r300/r300_state.c @@ -684,6 +684,7 @@ void r300_mark_fb_state_dirty(struct r300_context *r300, enum r300_fb_state_change change) { struct pipe_framebuffer_state *state = r300->fb_state.state; + boolean has_hyperz = r300->rws->get_value(r300->rws, R300_CAN_HYPERZ); /* What is marked as dirty depends on the enum r300_fb_state_change. */ r300->gpu_flush.dirty = TRUE; @@ -701,8 +702,11 @@ void r300_mark_fb_state_dirty(struct r300_context *r300, if (r300->cbzb_clear) r300->fb_state.size += 10; - else if (state->zsbuf) - r300->fb_state.size += r300->screen->caps.hiz_ram ? 18 : 14; + else if (state->zsbuf) { + r300->fb_state.size += 10; + if (has_hyperz) + r300->fb_state.size += r300->screen->caps.hiz_ram ? 8 : 4; + } /* The size of the rest of atoms stays the same. */ } -- cgit v1.2.3 From 0633c2e68312c292607d6af22d94d67d2d141600 Mon Sep 17 00:00:00 2001 From: Joakim Sindholt Date: Thu, 5 Aug 2010 20:42:04 +0200 Subject: targets/egl: fix build --- src/gallium/targets/egl/st_GL.c | 1 + 1 file changed, 1 insertion(+) (limited to 'src/gallium') diff --git a/src/gallium/targets/egl/st_GL.c b/src/gallium/targets/egl/st_GL.c index 676300b0cc..17b7bf9d48 100644 --- a/src/gallium/targets/egl/st_GL.c +++ b/src/gallium/targets/egl/st_GL.c @@ -1,4 +1,5 @@ #include "state_tracker/st_gl_api.h" +#include "state_tracker/st_api.h" #if FEATURE_GL PUBLIC struct st_api * -- cgit v1.2.3 From 9c949d4a4dd43b7889e13bdf683bcf211f049ced Mon Sep 17 00:00:00 2001 From: Jerome Glisse Date: Wed, 4 Aug 2010 17:37:59 -0400 Subject: r600g: don't use dynamic state allocation for states Simplify state handly by avoiding state allocation. Next step is to allocate once for all context packet buffer and then avoid rebuilding pm4 packet each time (through use of combined crc) this would also avoid number of memcpy. Signed-off-by: Jerome Glisse --- src/gallium/drivers/r600/r600_context.c | 141 +++++++-------- src/gallium/drivers/r600/r600_context.h | 29 +-- src/gallium/drivers/r600/r600_draw.c | 115 ++++++------ src/gallium/drivers/r600/r600_shader.c | 34 ++-- src/gallium/drivers/r600/r600_state.c | 281 ++++++++++++----------------- src/gallium/drivers/r600/radeon.h | 52 ++++-- src/gallium/targets/dri-r600/Makefile | 4 +- src/gallium/winsys/r600/drm/radeon.c | 11 -- src/gallium/winsys/r600/drm/radeon_ctx.c | 160 +++++----------- src/gallium/winsys/r600/drm/radeon_draw.c | 92 +--------- src/gallium/winsys/r600/drm/radeon_priv.h | 29 --- src/gallium/winsys/r600/drm/radeon_state.c | 70 +------ 12 files changed, 361 insertions(+), 657 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/drivers/r600/r600_context.c b/src/gallium/drivers/r600/r600_context.c index ae1780a1d4..f7732d8952 100644 --- a/src/gallium/drivers/r600/r600_context.c +++ b/src/gallium/drivers/r600/r600_context.c @@ -48,18 +48,14 @@ void r600_flush(struct pipe_context *ctx, unsigned flags, struct r600_screen *rscreen = rctx->screen; static int dc = 0; - if (radeon_ctx_pm4(rctx->ctx)) + if (radeon_ctx_pm4(&rctx->ctx)) return; /* FIXME dumping should be removed once shader support instructions * without throwing bad code */ if (!dc) - radeon_ctx_dump_bof(rctx->ctx, "gallium.bof"); -#if 1 - radeon_ctx_submit(rctx->ctx); -#endif - rctx->ctx = radeon_ctx_decref(rctx->ctx); - rctx->ctx = radeon_ctx(rscreen->rw); + radeon_ctx_dump_bof(&rctx->ctx, "gallium.bof"); + radeon_ctx_submit(&rctx->ctx); dc++; } @@ -220,9 +216,8 @@ static void r600_init_config(struct r600_context *rctx) printf("num_gs_stack_entries : %d\n", num_gs_stack_entries); printf("num_es_stack_entries : %d\n", num_es_stack_entries); - rctx->hw_states.config = radeon_state(rctx->rw, R600_CONFIG_TYPE, R600_CONFIG); - - rctx->hw_states.config->states[R600_CONFIG__SQ_CONFIG] = 0x00000000; + radeon_state_init(&rctx->config, rctx->rw, R600_CONFIG_TYPE, R600_CONFIG); + rctx->config.states[R600_CONFIG__SQ_CONFIG] = 0x00000000; switch (family) { case CHIP_RV610: case CHIP_RV620: @@ -231,75 +226,75 @@ static void r600_init_config(struct r600_context *rctx) case CHIP_RV710: break; default: - rctx->hw_states.config->states[R600_CONFIG__SQ_CONFIG] |= S_008C00_VC_ENABLE(1); + rctx->config.states[R600_CONFIG__SQ_CONFIG] |= S_008C00_VC_ENABLE(1); break; } - rctx->hw_states.config->states[R600_CONFIG__SQ_CONFIG] |= S_008C00_DX9_CONSTS(1); - rctx->hw_states.config->states[R600_CONFIG__SQ_CONFIG] |= S_008C00_ALU_INST_PREFER_VECTOR(1); - rctx->hw_states.config->states[R600_CONFIG__SQ_CONFIG] |= S_008C00_PS_PRIO(ps_prio); - rctx->hw_states.config->states[R600_CONFIG__SQ_CONFIG] |= S_008C00_VS_PRIO(vs_prio); - rctx->hw_states.config->states[R600_CONFIG__SQ_CONFIG] |= S_008C00_GS_PRIO(gs_prio); - rctx->hw_states.config->states[R600_CONFIG__SQ_CONFIG] |= S_008C00_ES_PRIO(es_prio); + rctx->config.states[R600_CONFIG__SQ_CONFIG] |= S_008C00_DX9_CONSTS(1); + rctx->config.states[R600_CONFIG__SQ_CONFIG] |= S_008C00_ALU_INST_PREFER_VECTOR(1); + rctx->config.states[R600_CONFIG__SQ_CONFIG] |= S_008C00_PS_PRIO(ps_prio); + rctx->config.states[R600_CONFIG__SQ_CONFIG] |= S_008C00_VS_PRIO(vs_prio); + rctx->config.states[R600_CONFIG__SQ_CONFIG] |= S_008C00_GS_PRIO(gs_prio); + rctx->config.states[R600_CONFIG__SQ_CONFIG] |= S_008C00_ES_PRIO(es_prio); - rctx->hw_states.config->states[R600_CONFIG__SQ_GPR_RESOURCE_MGMT_1] = 0; - rctx->hw_states.config->states[R600_CONFIG__SQ_GPR_RESOURCE_MGMT_1] |= S_008C04_NUM_PS_GPRS(num_ps_gprs); - rctx->hw_states.config->states[R600_CONFIG__SQ_GPR_RESOURCE_MGMT_1] |= S_008C04_NUM_VS_GPRS(num_vs_gprs); - rctx->hw_states.config->states[R600_CONFIG__SQ_GPR_RESOURCE_MGMT_1] |= S_008C04_NUM_CLAUSE_TEMP_GPRS(num_temp_gprs); + rctx->config.states[R600_CONFIG__SQ_GPR_RESOURCE_MGMT_1] = 0; + rctx->config.states[R600_CONFIG__SQ_GPR_RESOURCE_MGMT_1] |= S_008C04_NUM_PS_GPRS(num_ps_gprs); + rctx->config.states[R600_CONFIG__SQ_GPR_RESOURCE_MGMT_1] |= S_008C04_NUM_VS_GPRS(num_vs_gprs); + rctx->config.states[R600_CONFIG__SQ_GPR_RESOURCE_MGMT_1] |= S_008C04_NUM_CLAUSE_TEMP_GPRS(num_temp_gprs); - rctx->hw_states.config->states[R600_CONFIG__SQ_GPR_RESOURCE_MGMT_2] = 0; - rctx->hw_states.config->states[R600_CONFIG__SQ_GPR_RESOURCE_MGMT_2] |= S_008C08_NUM_GS_GPRS(num_gs_gprs); - rctx->hw_states.config->states[R600_CONFIG__SQ_GPR_RESOURCE_MGMT_2] |= S_008C08_NUM_GS_GPRS(num_es_gprs); + rctx->config.states[R600_CONFIG__SQ_GPR_RESOURCE_MGMT_2] = 0; + rctx->config.states[R600_CONFIG__SQ_GPR_RESOURCE_MGMT_2] |= S_008C08_NUM_GS_GPRS(num_gs_gprs); + rctx->config.states[R600_CONFIG__SQ_GPR_RESOURCE_MGMT_2] |= S_008C08_NUM_GS_GPRS(num_es_gprs); - rctx->hw_states.config->states[R600_CONFIG__SQ_THREAD_RESOURCE_MGMT] = 0; - rctx->hw_states.config->states[R600_CONFIG__SQ_THREAD_RESOURCE_MGMT] |= S_008C0C_NUM_PS_THREADS(num_ps_threads); - rctx->hw_states.config->states[R600_CONFIG__SQ_THREAD_RESOURCE_MGMT] |= S_008C0C_NUM_VS_THREADS(num_vs_threads); - rctx->hw_states.config->states[R600_CONFIG__SQ_THREAD_RESOURCE_MGMT] |= S_008C0C_NUM_GS_THREADS(num_gs_threads); - rctx->hw_states.config->states[R600_CONFIG__SQ_THREAD_RESOURCE_MGMT] |= S_008C0C_NUM_ES_THREADS(num_es_threads); + rctx->config.states[R600_CONFIG__SQ_THREAD_RESOURCE_MGMT] = 0; + rctx->config.states[R600_CONFIG__SQ_THREAD_RESOURCE_MGMT] |= S_008C0C_NUM_PS_THREADS(num_ps_threads); + rctx->config.states[R600_CONFIG__SQ_THREAD_RESOURCE_MGMT] |= S_008C0C_NUM_VS_THREADS(num_vs_threads); + rctx->config.states[R600_CONFIG__SQ_THREAD_RESOURCE_MGMT] |= S_008C0C_NUM_GS_THREADS(num_gs_threads); + rctx->config.states[R600_CONFIG__SQ_THREAD_RESOURCE_MGMT] |= S_008C0C_NUM_ES_THREADS(num_es_threads); - rctx->hw_states.config->states[R600_CONFIG__SQ_STACK_RESOURCE_MGMT_1] = 0; - rctx->hw_states.config->states[R600_CONFIG__SQ_STACK_RESOURCE_MGMT_1] |= S_008C10_NUM_PS_STACK_ENTRIES(num_ps_stack_entries); - rctx->hw_states.config->states[R600_CONFIG__SQ_STACK_RESOURCE_MGMT_1] |= S_008C10_NUM_VS_STACK_ENTRIES(num_vs_stack_entries); + rctx->config.states[R600_CONFIG__SQ_STACK_RESOURCE_MGMT_1] = 0; + rctx->config.states[R600_CONFIG__SQ_STACK_RESOURCE_MGMT_1] |= S_008C10_NUM_PS_STACK_ENTRIES(num_ps_stack_entries); + rctx->config.states[R600_CONFIG__SQ_STACK_RESOURCE_MGMT_1] |= S_008C10_NUM_VS_STACK_ENTRIES(num_vs_stack_entries); - rctx->hw_states.config->states[R600_CONFIG__SQ_STACK_RESOURCE_MGMT_2] = 0; - rctx->hw_states.config->states[R600_CONFIG__SQ_STACK_RESOURCE_MGMT_2] |= S_008C14_NUM_GS_STACK_ENTRIES(num_gs_stack_entries); - rctx->hw_states.config->states[R600_CONFIG__SQ_STACK_RESOURCE_MGMT_2] |= S_008C14_NUM_ES_STACK_ENTRIES(num_es_stack_entries); + rctx->config.states[R600_CONFIG__SQ_STACK_RESOURCE_MGMT_2] = 0; + rctx->config.states[R600_CONFIG__SQ_STACK_RESOURCE_MGMT_2] |= S_008C14_NUM_GS_STACK_ENTRIES(num_gs_stack_entries); + rctx->config.states[R600_CONFIG__SQ_STACK_RESOURCE_MGMT_2] |= S_008C14_NUM_ES_STACK_ENTRIES(num_es_stack_entries); - rctx->hw_states.config->states[R600_CONFIG__SQ_DYN_GPR_CNTL_PS_FLUSH_REQ] = 0x00004000; - rctx->hw_states.config->states[R600_CONFIG__TA_CNTL_AUX] = 0x07000002; - rctx->hw_states.config->states[R600_CONFIG__VC_ENHANCE] = 0x00000000; - rctx->hw_states.config->states[R600_CONFIG__DB_DEBUG] = 0x00000000; - rctx->hw_states.config->states[R600_CONFIG__DB_WATERMARKS] = 0x00420204; - rctx->hw_states.config->states[R600_CONFIG__SX_MISC] = 0x00000000; - rctx->hw_states.config->states[R600_CONFIG__SPI_THREAD_GROUPING] = 0x00000001; - rctx->hw_states.config->states[R600_CONFIG__CB_SHADER_CONTROL] = 0x00000003; - rctx->hw_states.config->states[R600_CONFIG__SQ_ESGS_RING_ITEMSIZE] = 0x00000000; - rctx->hw_states.config->states[R600_CONFIG__SQ_GSVS_RING_ITEMSIZE] = 0x00000000; - rctx->hw_states.config->states[R600_CONFIG__SQ_ESTMP_RING_ITEMSIZE] = 0x00000000; - rctx->hw_states.config->states[R600_CONFIG__SQ_GSTMP_RING_ITEMSIZE] = 0x00000000; - rctx->hw_states.config->states[R600_CONFIG__SQ_VSTMP_RING_ITEMSIZE] = 0x00000000; - rctx->hw_states.config->states[R600_CONFIG__SQ_PSTMP_RING_ITEMSIZE] = 0x00000000; - rctx->hw_states.config->states[R600_CONFIG__SQ_FBUF_RING_ITEMSIZE] = 0x00000000; - rctx->hw_states.config->states[R600_CONFIG__SQ_REDUC_RING_ITEMSIZE] = 0x00000000; - rctx->hw_states.config->states[R600_CONFIG__SQ_GS_VERT_ITEMSIZE] = 0x00000000; - rctx->hw_states.config->states[R600_CONFIG__VGT_OUTPUT_PATH_CNTL] = 0x00000000; - rctx->hw_states.config->states[R600_CONFIG__VGT_HOS_CNTL] = 0x00000000; - rctx->hw_states.config->states[R600_CONFIG__VGT_HOS_MAX_TESS_LEVEL] = 0x00000000; - rctx->hw_states.config->states[R600_CONFIG__VGT_HOS_MIN_TESS_LEVEL] = 0x00000000; - rctx->hw_states.config->states[R600_CONFIG__VGT_HOS_REUSE_DEPTH] = 0x00000000; - rctx->hw_states.config->states[R600_CONFIG__VGT_GROUP_PRIM_TYPE] = 0x00000000; - rctx->hw_states.config->states[R600_CONFIG__VGT_GROUP_FIRST_DECR] = 0x00000000; - rctx->hw_states.config->states[R600_CONFIG__VGT_GROUP_DECR] = 0x00000000; - rctx->hw_states.config->states[R600_CONFIG__VGT_GROUP_VECT_0_CNTL] = 0x00000000; - rctx->hw_states.config->states[R600_CONFIG__VGT_GROUP_VECT_1_CNTL] = 0x00000000; - rctx->hw_states.config->states[R600_CONFIG__VGT_GROUP_VECT_0_FMT_CNTL] = 0x00000000; - rctx->hw_states.config->states[R600_CONFIG__VGT_GROUP_VECT_1_FMT_CNTL] = 0x00000000; - rctx->hw_states.config->states[R600_CONFIG__VGT_GS_MODE] = 0x00000000; - rctx->hw_states.config->states[R600_CONFIG__PA_SC_MODE_CNTL] = 0x00514000; - rctx->hw_states.config->states[R600_CONFIG__VGT_STRMOUT_EN] = 0x00000000; - rctx->hw_states.config->states[R600_CONFIG__VGT_REUSE_OFF] = 0x00000001; - rctx->hw_states.config->states[R600_CONFIG__VGT_VTX_CNT_EN] = 0x00000000; - rctx->hw_states.config->states[R600_CONFIG__VGT_STRMOUT_BUFFER_EN] = 0x00000000; - radeon_state_pm4(rctx->hw_states.config); + rctx->config.states[R600_CONFIG__SQ_DYN_GPR_CNTL_PS_FLUSH_REQ] = 0x00004000; + rctx->config.states[R600_CONFIG__TA_CNTL_AUX] = 0x07000002; + rctx->config.states[R600_CONFIG__VC_ENHANCE] = 0x00000000; + rctx->config.states[R600_CONFIG__DB_DEBUG] = 0x00000000; + rctx->config.states[R600_CONFIG__DB_WATERMARKS] = 0x00420204; + rctx->config.states[R600_CONFIG__SX_MISC] = 0x00000000; + rctx->config.states[R600_CONFIG__SPI_THREAD_GROUPING] = 0x00000001; + rctx->config.states[R600_CONFIG__CB_SHADER_CONTROL] = 0x00000003; + rctx->config.states[R600_CONFIG__SQ_ESGS_RING_ITEMSIZE] = 0x00000000; + rctx->config.states[R600_CONFIG__SQ_GSVS_RING_ITEMSIZE] = 0x00000000; + rctx->config.states[R600_CONFIG__SQ_ESTMP_RING_ITEMSIZE] = 0x00000000; + rctx->config.states[R600_CONFIG__SQ_GSTMP_RING_ITEMSIZE] = 0x00000000; + rctx->config.states[R600_CONFIG__SQ_VSTMP_RING_ITEMSIZE] = 0x00000000; + rctx->config.states[R600_CONFIG__SQ_PSTMP_RING_ITEMSIZE] = 0x00000000; + rctx->config.states[R600_CONFIG__SQ_FBUF_RING_ITEMSIZE] = 0x00000000; + rctx->config.states[R600_CONFIG__SQ_REDUC_RING_ITEMSIZE] = 0x00000000; + rctx->config.states[R600_CONFIG__SQ_GS_VERT_ITEMSIZE] = 0x00000000; + rctx->config.states[R600_CONFIG__VGT_OUTPUT_PATH_CNTL] = 0x00000000; + rctx->config.states[R600_CONFIG__VGT_HOS_CNTL] = 0x00000000; + rctx->config.states[R600_CONFIG__VGT_HOS_MAX_TESS_LEVEL] = 0x00000000; + rctx->config.states[R600_CONFIG__VGT_HOS_MIN_TESS_LEVEL] = 0x00000000; + rctx->config.states[R600_CONFIG__VGT_HOS_REUSE_DEPTH] = 0x00000000; + rctx->config.states[R600_CONFIG__VGT_GROUP_PRIM_TYPE] = 0x00000000; + rctx->config.states[R600_CONFIG__VGT_GROUP_FIRST_DECR] = 0x00000000; + rctx->config.states[R600_CONFIG__VGT_GROUP_DECR] = 0x00000000; + rctx->config.states[R600_CONFIG__VGT_GROUP_VECT_0_CNTL] = 0x00000000; + rctx->config.states[R600_CONFIG__VGT_GROUP_VECT_1_CNTL] = 0x00000000; + rctx->config.states[R600_CONFIG__VGT_GROUP_VECT_0_FMT_CNTL] = 0x00000000; + rctx->config.states[R600_CONFIG__VGT_GROUP_VECT_1_FMT_CNTL] = 0x00000000; + rctx->config.states[R600_CONFIG__VGT_GS_MODE] = 0x00000000; + rctx->config.states[R600_CONFIG__PA_SC_MODE_CNTL] = 0x00514000; + rctx->config.states[R600_CONFIG__VGT_STRMOUT_EN] = 0x00000000; + rctx->config.states[R600_CONFIG__VGT_REUSE_OFF] = 0x00000001; + rctx->config.states[R600_CONFIG__VGT_VTX_CNT_EN] = 0x00000000; + rctx->config.states[R600_CONFIG__VGT_STRMOUT_BUFFER_EN] = 0x00000000; + radeon_state_pm4(&rctx->config); } struct pipe_context *r600_create_context(struct pipe_screen *screen, void *priv) @@ -333,7 +328,7 @@ struct pipe_context *r600_create_context(struct pipe_screen *screen, void *priv) r600_init_config(rctx); - rctx->ctx = radeon_ctx(rscreen->rw); - rctx->draw = radeon_draw(rscreen->rw); + radeon_ctx_init(&rctx->ctx, rscreen->rw); + radeon_draw_init(&rctx->draw, rscreen->rw); return &rctx->context; } diff --git a/src/gallium/drivers/r600/r600_context.h b/src/gallium/drivers/r600/r600_context.h index 8d102b6850..78da88fef5 100644 --- a/src/gallium/drivers/r600/r600_context.h +++ b/src/gallium/drivers/r600/r600_context.h @@ -76,7 +76,7 @@ struct r600_context_state { union pipe_states state; unsigned refcount; unsigned type; - struct radeon_state *rstate; + struct radeon_state rstate; struct r600_shader shader; struct radeon_bo *bo; }; @@ -89,28 +89,28 @@ struct r600_vertex_element }; struct r600_context_hw_states { - struct radeon_state *rasterizer; - struct radeon_state *scissor; - struct radeon_state *dsa; - struct radeon_state *blend; - struct radeon_state *viewport; - struct radeon_state *cb0; - struct radeon_state *config; - struct radeon_state *cb_cntl; - struct radeon_state *db; + struct radeon_state rasterizer; + struct radeon_state scissor; + struct radeon_state dsa; + struct radeon_state blend; + struct radeon_state viewport; + struct radeon_state cb0; + struct radeon_state config; + struct radeon_state cb_cntl; + struct radeon_state db; unsigned ps_nresource; unsigned ps_nsampler; - struct radeon_state *ps_resource[160]; - struct radeon_state *ps_sampler[16]; + struct radeon_state ps_resource[160]; + struct radeon_state ps_sampler[16]; }; struct r600_context { struct pipe_context context; struct r600_screen *screen; struct radeon *rw; - struct radeon_ctx *ctx; + struct radeon_ctx ctx; struct blitter_context *blitter; - struct radeon_draw *draw; + struct radeon_draw draw; /* hw states */ struct r600_context_hw_states hw_states; /* pipe states */ @@ -120,6 +120,7 @@ struct r600_context { unsigned ps_nsampler_view; unsigned vs_nsampler_view; unsigned nvertex_buffer; + struct radeon_state config; struct r600_context_state *rasterizer; struct r600_context_state *poly_stipple; struct r600_context_state *scissor; diff --git a/src/gallium/drivers/r600/r600_draw.c b/src/gallium/drivers/r600/r600_draw.c index 43c805b982..7130bf2fa8 100644 --- a/src/gallium/drivers/r600/r600_draw.c +++ b/src/gallium/drivers/r600/r600_draw.c @@ -38,8 +38,8 @@ struct r600_draw { struct pipe_context *ctx; - struct radeon_state *draw; - struct radeon_state *vgt; + struct radeon_state draw; + struct radeon_state vgt; unsigned mode; unsigned start; unsigned count; @@ -51,7 +51,7 @@ static int r600_draw_common(struct r600_draw *draw) { struct r600_context *rctx = r600_context(draw->ctx); struct r600_screen *rscreen = rctx->screen; - struct radeon_state *vs_resource; + struct radeon_state vs_resource; struct r600_resource *rbuffer; unsigned i, j, offset, format, prim; u32 vgt_dma_index_type, vgt_draw_initiator; @@ -88,10 +88,10 @@ static int r600_draw_common(struct r600_draw *draw) r = r600_pipe_shader_update(draw->ctx, rctx->ps_shader); if (r) return r; - r = radeon_draw_set(rctx->draw, rctx->vs_shader->rstate); + r = radeon_draw_set(&rctx->draw, &rctx->vs_shader->rstate); if (r) return r; - r = radeon_draw_set(rctx->draw, rctx->ps_shader->rstate); + r = radeon_draw_set(&rctx->draw, &rctx->ps_shader->rstate); if (r) return r; @@ -103,81 +103,68 @@ static int r600_draw_common(struct r600_draw *draw) r = r600_conv_pipe_format(rctx->vertex_elements->elements[i].src_format, &format); if (r) return r; - vs_resource = radeon_state(rscreen->rw, R600_VS_RESOURCE_TYPE, R600_VS_RESOURCE + i); - if (vs_resource == NULL) - return -ENOMEM; - vs_resource->bo[0] = radeon_bo_incref(rscreen->rw, rbuffer->bo); - vs_resource->nbo = 1; - vs_resource->states[R600_PS_RESOURCE__RESOURCE0_WORD0] = offset; - vs_resource->states[R600_PS_RESOURCE__RESOURCE0_WORD1] = rbuffer->bo->size - offset; - vs_resource->states[R600_PS_RESOURCE__RESOURCE0_WORD2] = S_038008_STRIDE(vertex_buffer->stride) | - S_038008_DATA_FORMAT(format); - vs_resource->states[R600_PS_RESOURCE__RESOURCE0_WORD3] = 0x00000000; - vs_resource->states[R600_PS_RESOURCE__RESOURCE0_WORD4] = 0x00000000; - vs_resource->states[R600_PS_RESOURCE__RESOURCE0_WORD5] = 0x00000000; - vs_resource->states[R600_PS_RESOURCE__RESOURCE0_WORD6] = 0xC0000000; - vs_resource->placement[0] = RADEON_GEM_DOMAIN_GTT; - vs_resource->placement[1] = RADEON_GEM_DOMAIN_GTT; - r = radeon_draw_set_new(rctx->draw, vs_resource); - if (r) - return r; - } -#if 0 - /* setup texture sampler & resource */ - for (i = 0 ; i < rctx->ps_nsampler; i++) { - r = radeon_draw_set_new(rctx->draw, rctx->ps_sampler[i]->rstate); + r = radeon_state_init(&vs_resource, rscreen->rw, R600_VS_RESOURCE_TYPE, R600_VS_RESOURCE + i); if (r) return r; - } - for (i = 0 ; i < rctx->ps_nsampler_view; i++) { - r = radeon_draw_set_new(rctx->draw, rctx->ps_sampler_view[i]->rstate); + vs_resource.bo[0] = radeon_bo_incref(rscreen->rw, rbuffer->bo); + vs_resource.nbo = 1; + vs_resource.states[R600_PS_RESOURCE__RESOURCE0_WORD0] = offset; + vs_resource.states[R600_PS_RESOURCE__RESOURCE0_WORD1] = rbuffer->bo->size - offset; + vs_resource.states[R600_PS_RESOURCE__RESOURCE0_WORD2] = S_038008_STRIDE(vertex_buffer->stride) | + S_038008_DATA_FORMAT(format); + vs_resource.states[R600_PS_RESOURCE__RESOURCE0_WORD3] = 0x00000000; + vs_resource.states[R600_PS_RESOURCE__RESOURCE0_WORD4] = 0x00000000; + vs_resource.states[R600_PS_RESOURCE__RESOURCE0_WORD5] = 0x00000000; + vs_resource.states[R600_PS_RESOURCE__RESOURCE0_WORD6] = 0xC0000000; + vs_resource.placement[0] = RADEON_GEM_DOMAIN_GTT; + vs_resource.placement[1] = RADEON_GEM_DOMAIN_GTT; + radeon_state_pm4(&vs_resource); + r = radeon_draw_set(&rctx->draw, &vs_resource); if (r) return r; } -#endif /* FIXME start need to change winsys */ - draw->draw = radeon_state(rscreen->rw, R600_DRAW_TYPE, R600_DRAW); - if (draw->draw == NULL) - return -ENOMEM; - draw->draw->states[R600_DRAW__VGT_NUM_INDICES] = draw->count; - draw->draw->states[R600_DRAW__VGT_DRAW_INITIATOR] = vgt_draw_initiator; + r = radeon_state_init(&draw->draw, rscreen->rw, R600_DRAW_TYPE, R600_DRAW); + if (r) + return r; + draw->draw.states[R600_DRAW__VGT_NUM_INDICES] = draw->count; + draw->draw.states[R600_DRAW__VGT_DRAW_INITIATOR] = vgt_draw_initiator; if (draw->index_buffer) { - rbuffer = (struct r600_buffer*)draw->index_buffer; - draw->draw->bo[0] = radeon_bo_incref(rscreen->rw, rbuffer->bo); - draw->draw->placement[0] = RADEON_GEM_DOMAIN_GTT; - draw->draw->placement[1] = RADEON_GEM_DOMAIN_GTT; - draw->draw->nbo = 1; + rbuffer = (struct r600_resource*)draw->index_buffer; + draw->draw.bo[0] = radeon_bo_incref(rscreen->rw, rbuffer->bo); + draw->draw.placement[0] = RADEON_GEM_DOMAIN_GTT; + draw->draw.placement[1] = RADEON_GEM_DOMAIN_GTT; + draw->draw.nbo = 1; } - r = radeon_draw_set_new(rctx->draw, draw->draw); + radeon_state_pm4(&draw->draw); + r = radeon_draw_set(&rctx->draw, &draw->draw); + if (r) + return r; + r = radeon_state_init(&draw->vgt, rscreen->rw, R600_VGT_TYPE, R600_VGT); if (r) return r; - draw->vgt = radeon_state(rscreen->rw, R600_VGT_TYPE, R600_VGT); - if (draw->vgt == NULL) - return -ENOMEM; - draw->vgt->states[R600_VGT__VGT_PRIMITIVE_TYPE] = prim; - draw->vgt->states[R600_VGT__VGT_MAX_VTX_INDX] = 0x00FFFFFF; - draw->vgt->states[R600_VGT__VGT_MIN_VTX_INDX] = 0x00000000; - draw->vgt->states[R600_VGT__VGT_INDX_OFFSET] = draw->start; - draw->vgt->states[R600_VGT__VGT_MULTI_PRIM_IB_RESET_INDX] = 0x00000000; - draw->vgt->states[R600_VGT__VGT_DMA_INDEX_TYPE] = vgt_dma_index_type; - draw->vgt->states[R600_VGT__VGT_PRIMITIVEID_EN] = 0x00000000; - draw->vgt->states[R600_VGT__VGT_DMA_NUM_INSTANCES] = 0x00000001; - draw->vgt->states[R600_VGT__VGT_MULTI_PRIM_IB_RESET_EN] = 0x00000000; - draw->vgt->states[R600_VGT__VGT_INSTANCE_STEP_RATE_0] = 0x00000000; - draw->vgt->states[R600_VGT__VGT_INSTANCE_STEP_RATE_1] = 0x00000000; - r = radeon_draw_set_new(rctx->draw, draw->vgt); + draw->vgt.states[R600_VGT__VGT_PRIMITIVE_TYPE] = prim; + draw->vgt.states[R600_VGT__VGT_MAX_VTX_INDX] = 0x00FFFFFF; + draw->vgt.states[R600_VGT__VGT_MIN_VTX_INDX] = 0x00000000; + draw->vgt.states[R600_VGT__VGT_INDX_OFFSET] = draw->start; + draw->vgt.states[R600_VGT__VGT_MULTI_PRIM_IB_RESET_INDX] = 0x00000000; + draw->vgt.states[R600_VGT__VGT_DMA_INDEX_TYPE] = vgt_dma_index_type; + draw->vgt.states[R600_VGT__VGT_PRIMITIVEID_EN] = 0x00000000; + draw->vgt.states[R600_VGT__VGT_DMA_NUM_INSTANCES] = 0x00000001; + draw->vgt.states[R600_VGT__VGT_MULTI_PRIM_IB_RESET_EN] = 0x00000000; + draw->vgt.states[R600_VGT__VGT_INSTANCE_STEP_RATE_0] = 0x00000000; + draw->vgt.states[R600_VGT__VGT_INSTANCE_STEP_RATE_1] = 0x00000000; + radeon_state_pm4(&draw->vgt); + r = radeon_draw_set(&rctx->draw, &draw->vgt); if (r) return r; /* FIXME */ - r = radeon_ctx_set_draw_new(rctx->ctx, rctx->draw); + r = radeon_ctx_set_draw(&rctx->ctx, &rctx->draw); if (r == -EBUSY) { r600_flush(draw->ctx, 0, NULL); - r = radeon_ctx_set_draw_new(rctx->ctx, rctx->draw); + r = radeon_ctx_set_draw(&rctx->ctx, &rctx->draw); } - if (r) - return r; - rctx->draw = radeon_draw_duplicate(rctx->draw); - return 0; + return r; } void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info) diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index 8da102cde0..f38aa7b463 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -130,11 +130,12 @@ static int r600_pipe_shader_vs(struct pipe_context *ctx, struct r600_context_sta struct r600_shader *rshader = &rpshader->shader; struct radeon_state *state; unsigned i, tmp; + int r; - rpshader->rstate = radeon_state_decref(rpshader->rstate); - state = radeon_state(rscreen->rw, R600_VS_SHADER_TYPE, R600_VS_SHADER); - if (state == NULL) - return -ENOMEM; + r = radeon_state_init(&rpshader->rstate, rscreen->rw, R600_VS_SHADER_TYPE, R600_VS_SHADER); + if (r) + return r; + state = &rpshader->rstate; for (i = 0; i < 10; i++) { state->states[R600_VS_SHADER__SPI_VS_OUT_ID_0 + i] = 0; } @@ -145,11 +146,10 @@ static int r600_pipe_shader_vs(struct pipe_context *ctx, struct r600_context_sta } state->states[R600_VS_SHADER__SPI_VS_OUT_CONFIG] = S_0286C4_VS_EXPORT_COUNT(rshader->noutput - 2); state->states[R600_VS_SHADER__SQ_PGM_RESOURCES_VS] = S_028868_NUM_GPRS(rshader->bc.ngpr); - rpshader->rstate = state; - rpshader->rstate->bo[0] = radeon_bo_incref(rscreen->rw, rpshader->bo); - rpshader->rstate->bo[1] = radeon_bo_incref(rscreen->rw, rpshader->bo); - rpshader->rstate->nbo = 2; - rpshader->rstate->placement[0] = RADEON_GEM_DOMAIN_GTT; + rpshader->rstate.bo[0] = radeon_bo_incref(rscreen->rw, rpshader->bo); + rpshader->rstate.bo[1] = radeon_bo_incref(rscreen->rw, rpshader->bo); + rpshader->rstate.nbo = 2; + rpshader->rstate.placement[0] = RADEON_GEM_DOMAIN_GTT; return radeon_state_pm4(state); } @@ -159,11 +159,12 @@ static int r600_pipe_shader_ps(struct pipe_context *ctx, struct r600_context_sta struct r600_shader *rshader = &rpshader->shader; struct radeon_state *state; unsigned i, tmp; + int r; - rpshader->rstate = radeon_state_decref(rpshader->rstate); - state = radeon_state(rscreen->rw, R600_PS_SHADER_TYPE, R600_PS_SHADER); - if (state == NULL) - return -ENOMEM; + r = radeon_state_init(&rpshader->rstate, rscreen->rw, R600_PS_SHADER_TYPE, R600_PS_SHADER); + if (r) + return r; + state = &rpshader->rstate; for (i = 0; i < rshader->ninput; i++) { tmp = S_028644_SEMANTIC(i); tmp |= S_028644_SEL_CENTROID(1); @@ -178,10 +179,9 @@ static int r600_pipe_shader_ps(struct pipe_context *ctx, struct r600_context_sta state->states[R600_PS_SHADER__SPI_PS_IN_CONTROL_1] = 0x00000000; state->states[R600_PS_SHADER__SQ_PGM_RESOURCES_PS] = S_028868_NUM_GPRS(rshader->bc.ngpr); state->states[R600_PS_SHADER__SQ_PGM_EXPORTS_PS] = 0x00000002; - rpshader->rstate = state; - rpshader->rstate->bo[0] = radeon_bo_incref(rscreen->rw, rpshader->bo); - rpshader->rstate->nbo = 1; - rpshader->rstate->placement[0] = RADEON_GEM_DOMAIN_GTT; + rpshader->rstate.bo[0] = radeon_bo_incref(rscreen->rw, rpshader->bo); + rpshader->rstate.nbo = 1; + rpshader->rstate.placement[0] = RADEON_GEM_DOMAIN_GTT; return radeon_state_pm4(state); } diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index e8a591f73e..926a19cc6f 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -277,9 +277,10 @@ static void r600_set_constant_buffer(struct pipe_context *ctx, struct r600_screen *rscreen = r600_screen(ctx->screen); struct r600_context *rctx = r600_context(ctx); unsigned nconstant = 0, i, type, id; - struct radeon_state *rstate; + struct radeon_state rstate; struct pipe_transfer *transfer; u32 *ptr; + int r; switch (shader) { case PIPE_SHADER_VERTEX: @@ -300,16 +301,16 @@ static void r600_set_constant_buffer(struct pipe_context *ctx, if (ptr == NULL) return; for (i = 0; i < nconstant; i++) { - rstate = radeon_state(rscreen->rw, type, id + i); - if (rstate == NULL) + r = radeon_state_init(&rstate, rscreen->rw, type, id + i); + if (r) return; - rstate->states[R600_PS_CONSTANT__SQ_ALU_CONSTANT0_0] = ptr[i * 4 + 0]; - rstate->states[R600_PS_CONSTANT__SQ_ALU_CONSTANT1_0] = ptr[i * 4 + 1]; - rstate->states[R600_PS_CONSTANT__SQ_ALU_CONSTANT2_0] = ptr[i * 4 + 2]; - rstate->states[R600_PS_CONSTANT__SQ_ALU_CONSTANT3_0] = ptr[i * 4 + 3]; - if (radeon_state_pm4(rstate)) + rstate.states[R600_PS_CONSTANT__SQ_ALU_CONSTANT0_0] = ptr[i * 4 + 0]; + rstate.states[R600_PS_CONSTANT__SQ_ALU_CONSTANT1_0] = ptr[i * 4 + 1]; + rstate.states[R600_PS_CONSTANT__SQ_ALU_CONSTANT2_0] = ptr[i * 4 + 2]; + rstate.states[R600_PS_CONSTANT__SQ_ALU_CONSTANT3_0] = ptr[i * 4 + 3]; + if (radeon_state_pm4(&rstate)) return; - if (radeon_draw_set_new(rctx->draw, rstate)) + if (radeon_draw_set(&rctx->draw, &rstate)) return; } pipe_buffer_unmap(ctx, buffer, transfer); @@ -520,7 +521,6 @@ struct r600_context_state *r600_context_state_decref(struct r600_context_state * R600_ERR("invalid type %d\n", rstate->type); return NULL; } - radeon_state_decref(rstate->rstate); FREE(rstate); return NULL; } @@ -603,16 +603,17 @@ struct r600_context_state *r600_context_state(struct r600_context *rctx, unsigne return rstate; } -static struct radeon_state *r600_blend(struct r600_context *rctx) +static int r600_blend(struct r600_context *rctx, struct radeon_state *rstate) { struct r600_screen *rscreen = rctx->screen; - struct radeon_state *rstate; const struct pipe_blend_state *state = &rctx->blend->state.blend; int i; + int r; + + r = radeon_state_init(rstate, rscreen->rw, R600_BLEND_TYPE, R600_BLEND); + if (r) + return r; - rstate = radeon_state(rscreen->rw, R600_BLEND_TYPE, R600_BLEND); - if (rstate == NULL) - return NULL; rstate->states[R600_BLEND__CB_BLEND_RED] = fui(rctx->blend_color.color[0]); rstate->states[R600_BLEND__CB_BLEND_GREEN] = fui(rctx->blend_color.color[1]); rstate->states[R600_BLEND__CB_BLEND_BLUE] = fui(rctx->blend_color.color[2]); @@ -656,26 +657,23 @@ static struct radeon_state *r600_blend(struct r600_context *rctx) rstate->states[R600_BLEND__CB_BLEND_CONTROL] = bc; } - if (radeon_state_pm4(rstate)) { - radeon_state_decref(rstate); - return NULL; - } - return rstate; + return radeon_state_pm4(rstate); } -static struct radeon_state *r600_cb0(struct r600_context *rctx) +static int r600_cb0(struct r600_context *rctx, struct radeon_state *rstate) { struct r600_screen *rscreen = rctx->screen; struct r600_resource_texture *rtex; struct r600_resource *rbuffer; - struct radeon_state *rstate; const struct pipe_framebuffer_state *state = &rctx->framebuffer->state.framebuffer; unsigned level = state->cbufs[0]->level; unsigned pitch, slice; + int r; + + r = radeon_state_init(rstate, rscreen->rw, R600_CB0_TYPE, R600_CB0); + if (r) + return r; - rstate = radeon_state(rscreen->rw, R600_CB0_TYPE, R600_CB0); - if (rstate == NULL) - return NULL; rtex = (struct r600_resource_texture*)state->cbufs[0]->texture; rbuffer = &rtex->resource; rstate->bo[0] = radeon_bo_incref(rscreen->rw, rbuffer->bo); @@ -695,11 +693,7 @@ static struct radeon_state *r600_cb0(struct r600_context *rctx) rstate->states[R600_CB0__CB_COLOR0_FRAG] = 0x00000000; rstate->states[R600_CB0__CB_COLOR0_TILE] = 0x00000000; rstate->states[R600_CB0__CB_COLOR0_MASK] = 0x00000000; - if (radeon_state_pm4(rstate)) { - radeon_state_decref(rstate); - return NULL; - } - return rstate; + return radeon_state_pm4(rstate); } static int r600_db_format(unsigned pformat, unsigned *format) @@ -718,23 +712,22 @@ static int r600_db_format(unsigned pformat, unsigned *format) } } -static struct radeon_state *r600_db(struct r600_context *rctx) +static int r600_db(struct r600_context *rctx, struct radeon_state *rstate) { struct r600_screen *rscreen = rctx->screen; struct r600_resource_texture *rtex; struct r600_resource *rbuffer; - struct radeon_state *rstate; const struct pipe_framebuffer_state *state = &rctx->framebuffer->state.framebuffer; unsigned level = state->cbufs[0]->level; unsigned pitch, slice, format; + int r; - if (state->zsbuf == NULL) - return NULL; - - rstate = radeon_state(rscreen->rw, R600_DB_TYPE, R600_DB); - if (rstate == NULL) - return NULL; + r = radeon_state_init(rstate, rscreen->rw, R600_DB_TYPE, R600_DB); + if (r) + return r; + if (state->zsbuf == NULL) + return 0; rtex = (struct r600_resource_texture*)state->zsbuf->texture; rbuffer = &rtex->resource; rstate->bo[0] = radeon_bo_incref(rscreen->rw, rbuffer->bo); @@ -744,8 +737,7 @@ static struct radeon_state *r600_db(struct r600_context *rctx) pitch = (rtex->pitch[level] / rtex->bpt) / 8 - 1; slice = (rtex->pitch[level] / rtex->bpt) * state->zsbuf->height / 64 - 1; if (r600_db_format(state->zsbuf->texture->format, &format)) { - radeon_state_decref(rstate); - return NULL; + return -EINVAL; } rstate->states[R600_DB__DB_DEPTH_BASE] = 0x00000000; rstate->states[R600_DB__DB_DEPTH_INFO] = 0x00010000 | @@ -754,23 +746,22 @@ static struct radeon_state *r600_db(struct r600_context *rctx) rstate->states[R600_DB__DB_PREFETCH_LIMIT] = (state->zsbuf->height / 8) -1; rstate->states[R600_DB__DB_DEPTH_SIZE] = S_028000_PITCH_TILE_MAX(pitch) | S_028000_SLICE_TILE_MAX(slice); - if (radeon_state_pm4(rstate)) { - radeon_state_decref(rstate); - return NULL; - } - return rstate; + return radeon_state_pm4(rstate); } -static struct radeon_state *r600_rasterizer(struct r600_context *rctx) +static int r600_rasterizer(struct r600_context *rctx, struct radeon_state *rstate) { const struct pipe_rasterizer_state *state = &rctx->rasterizer->state.rasterizer; const struct pipe_framebuffer_state *fb = &rctx->framebuffer->state.framebuffer; struct r600_screen *rscreen = rctx->screen; - struct radeon_state *rstate; float offset_units = 0, offset_scale = 0; char depth = 0; unsigned offset_db_fmt_cntl = 0; + int r; + r = radeon_state_init(rstate, rscreen->rw, R600_RASTERIZER_TYPE, R600_RASTERIZER); + if (r) + return r; if (fb->zsbuf) { offset_units = state->offset_units; offset_scale = state->offset_scale * 12.0f; @@ -791,15 +782,12 @@ static struct radeon_state *r600_rasterizer(struct r600_context *rctx) break; default: R600_ERR("unsupported %d\n", fb->zsbuf->texture->format); - return NULL; + return -EINVAL; } } offset_db_fmt_cntl |= S_028DF8_POLY_OFFSET_NEG_NUM_DB_BITS(depth); rctx->flat_shade = state->flatshade; - rstate = radeon_state(rscreen->rw, R600_RASTERIZER_TYPE, R600_RASTERIZER); - if (rstate == NULL) - return NULL; rstate->states[R600_RASTERIZER__SPI_INTERP_CONTROL_0] = 0x00000001; rstate->states[R600_RASTERIZER__PA_CL_CLIP_CNTL] = 0x00000000; rstate->states[R600_RASTERIZER__PA_SU_SC_MODE_CNTL] = 0x00080000 | @@ -827,25 +815,21 @@ static struct radeon_state *r600_rasterizer(struct r600_context *rctx) rstate->states[R600_RASTERIZER__PA_SU_POLY_OFFSET_FRONT_OFFSET] = fui(offset_units); rstate->states[R600_RASTERIZER__PA_SU_POLY_OFFSET_BACK_SCALE] = fui(offset_scale); rstate->states[R600_RASTERIZER__PA_SU_POLY_OFFSET_BACK_OFFSET] = fui(offset_units); - if (radeon_state_pm4(rstate)) { - radeon_state_decref(rstate); - return NULL; - } - return rstate; + return radeon_state_pm4(rstate); } -static struct radeon_state *r600_scissor(struct r600_context *rctx) +static int r600_scissor(struct r600_context *rctx, struct radeon_state *rstate) { const struct pipe_scissor_state *state = &rctx->scissor->state.scissor; struct r600_screen *rscreen = rctx->screen; - struct radeon_state *rstate; u32 tl, br; + int r; + r = radeon_state_init(rstate, rscreen->rw, R600_SCISSOR_TYPE, R600_SCISSOR); + if (r) + return r; tl = S_028240_TL_X(state->minx) | S_028240_TL_Y(state->miny) | S_028240_WINDOW_OFFSET_DISABLE(1); br = S_028244_BR_X(state->maxx) | S_028244_BR_Y(state->maxy); - rstate = radeon_state(rscreen->rw, R600_SCISSOR_TYPE, R600_SCISSOR); - if (rstate == NULL) - return NULL; rstate->states[R600_SCISSOR__PA_SC_SCREEN_SCISSOR_TL] = tl; rstate->states[R600_SCISSOR__PA_SC_SCREEN_SCISSOR_BR] = br; rstate->states[R600_SCISSOR__PA_SC_WINDOW_OFFSET] = 0x00000000; @@ -865,22 +849,18 @@ static struct radeon_state *r600_scissor(struct r600_context *rctx) rstate->states[R600_SCISSOR__PA_SC_GENERIC_SCISSOR_BR] = br; rstate->states[R600_SCISSOR__PA_SC_VPORT_SCISSOR_0_TL] = tl; rstate->states[R600_SCISSOR__PA_SC_VPORT_SCISSOR_0_BR] = br; - if (radeon_state_pm4(rstate)) { - radeon_state_decref(rstate); - return NULL; - } - return rstate; + return radeon_state_pm4(rstate); } -static struct radeon_state *r600_viewport(struct r600_context *rctx) +static int r600_viewport(struct r600_context *rctx, struct radeon_state *rstate) { const struct pipe_viewport_state *state = &rctx->viewport->state.viewport; struct r600_screen *rscreen = rctx->screen; - struct radeon_state *rstate; + int r; - rstate = radeon_state(rscreen->rw, R600_VIEWPORT_TYPE, R600_VIEWPORT); - if (rstate == NULL) - return NULL; + r = radeon_state_init(rstate, rscreen->rw, R600_VIEWPORT_TYPE, R600_VIEWPORT); + if (r) + return r; rstate->states[R600_VIEWPORT__PA_SC_VPORT_ZMIN_0] = 0x00000000; rstate->states[R600_VIEWPORT__PA_SC_VPORT_ZMAX_0] = 0x3F800000; rstate->states[R600_VIEWPORT__PA_CL_VPORT_XSCALE_0] = fui(state->scale[0]); @@ -890,33 +870,29 @@ static struct radeon_state *r600_viewport(struct r600_context *rctx) rstate->states[R600_VIEWPORT__PA_CL_VPORT_YOFFSET_0] = fui(state->translate[1]); rstate->states[R600_VIEWPORT__PA_CL_VPORT_ZOFFSET_0] = fui(state->translate[2]); rstate->states[R600_VIEWPORT__PA_CL_VTE_CNTL] = 0x0000043F; - if (radeon_state_pm4(rstate)) { - radeon_state_decref(rstate); - return NULL; - } - return rstate; + return radeon_state_pm4(rstate); } -static struct radeon_state *r600_dsa(struct r600_context *rctx) +static int r600_dsa(struct r600_context *rctx, struct radeon_state *rstate) { const struct pipe_depth_stencil_alpha_state *state = &rctx->dsa->state.dsa; const struct pipe_stencil_ref *stencil_ref = &rctx->stencil_ref->state.stencil_ref; struct r600_screen *rscreen = rctx->screen; - struct radeon_state *rstate; unsigned db_depth_control, alpha_test_control, alpha_ref; unsigned stencil_ref_mask, stencil_ref_mask_bf; - - rstate = radeon_state(rscreen->rw, R600_DSA_TYPE, R600_DSA); - if (rstate == NULL) - return NULL; + int r; + + r = radeon_state_init(rstate, rscreen->rw, R600_DSA_TYPE, R600_DSA); + if (r) + return r; stencil_ref_mask = 0; stencil_ref_mask_bf = 0; db_depth_control = S_028800_Z_ENABLE(state->depth.enabled) | S_028800_Z_WRITE_ENABLE(state->depth.writemask) | S_028800_ZFUNC(state->depth.func); - /* set stencil enable */ + /* set stencil enable */ if (state->stencil[0].enabled) { db_depth_control |= S_028800_STENCIL_ENABLE(1); db_depth_control |= S_028800_STENCILFUNC(r600_translate_ds_func(state->stencil[0].func)); @@ -963,11 +939,7 @@ static struct radeon_state *r600_dsa(struct r600_context *rctx) rstate->states[R600_DSA__DB_SRESULTS_COMPARE_STATE1] = 0x00000000; rstate->states[R600_DSA__DB_PRELOAD_CONTROL] = 0x00000000; rstate->states[R600_DSA__DB_ALPHA_TO_MASK] = 0x0000AA00; - if (radeon_state_pm4(rstate)) { - radeon_state_decref(rstate); - return NULL; - } - return rstate; + return radeon_state_pm4(rstate); } static inline unsigned r600_tex_wrap(unsigned wrap) @@ -1045,16 +1017,15 @@ static INLINE u32 S_FIXED(float value, u32 frac_bits) return value * (1 << frac_bits); } -static struct radeon_state *r600_sampler(struct r600_context *rctx, - const struct pipe_sampler_state *state, - unsigned id) +static int r600_sampler(struct r600_context *rctx, struct radeon_state *rstate, + const struct pipe_sampler_state *state, unsigned id) { struct r600_screen *rscreen = rctx->screen; - struct radeon_state *rstate; + int r; - rstate = radeon_state(rscreen->rw, R600_PS_SAMPLER_TYPE, id); - if (rstate == NULL) - return NULL; + r = radeon_state_init(rstate, rscreen->rw, R600_PS_SAMPLER_TYPE, id); + if (r) + return r; rstate->states[R600_PS_SAMPLER__SQ_TEX_SAMPLER_WORD0_0] = S_03C000_CLAMP_X(r600_tex_wrap(state->wrap_s)) | S_03C000_CLAMP_Y(r600_tex_wrap(state->wrap_t)) | @@ -1069,11 +1040,7 @@ static struct radeon_state *r600_sampler(struct r600_context *rctx, S_03C004_MAX_LOD(S_FIXED(CLAMP(state->max_lod, 0, 15), 6)) | S_03C004_LOD_BIAS(S_FIXED(CLAMP(state->lod_bias, -16, 16), 6)); rstate->states[R600_PS_SAMPLER__SQ_TEX_SAMPLER_WORD2_0] = S_03C008_TYPE(1); - if (radeon_state_pm4(rstate)) { - radeon_state_decref(rstate); - return NULL; - } - return rstate; + return radeon_state_pm4(rstate); } static inline unsigned r600_tex_swizzle(unsigned swizzle) @@ -1123,27 +1090,25 @@ static inline unsigned r600_tex_dim(unsigned dim) } } -static struct radeon_state *r600_resource(struct r600_context *rctx, - const struct pipe_sampler_view *view, - unsigned id) +static int r600_resource(struct r600_context *rctx, struct radeon_state *rstate, + const struct pipe_sampler_view *view, unsigned id) { struct r600_screen *rscreen = rctx->screen; const struct util_format_description *desc; struct r600_resource_texture *tmp; struct r600_resource *rbuffer; - struct radeon_state *rstate; unsigned format; + int r; + r = radeon_state_init(rstate, rscreen->rw, R600_PS_RESOURCE_TYPE, id); + if (r) + return r; if (r600_conv_pipe_format(view->texture->format, &format)) - return NULL; + return -EINVAL; desc = util_format_description(view->texture->format); if (desc == NULL) { R600_ERR("unknow format %d\n", view->texture->format); - return NULL; - } - rstate = radeon_state(rscreen->rw, R600_PS_RESOURCE_TYPE, id); - if (rstate == NULL) { - return NULL; + return -EINVAL; } tmp = (struct r600_resource_texture*)view->texture; rbuffer = &tmp->resource; @@ -1185,20 +1150,15 @@ static struct radeon_state *r600_resource(struct r600_context *rctx, S_038014_LAST_ARRAY(0); rstate->states[R600_PS_RESOURCE__RESOURCE0_WORD6] = S_038018_TYPE(V_038010_SQ_TEX_VTX_VALID_TEXTURE); - if (radeon_state_pm4(rstate)) { - radeon_state_decref(rstate); - return NULL; - } - return rstate; + return radeon_state_pm4(rstate); } -static struct radeon_state *r600_cb_cntl(struct r600_context *rctx) +static int r600_cb_cntl(struct r600_context *rctx, struct radeon_state *rstate) { struct r600_screen *rscreen = rctx->screen; - struct radeon_state *rstate; const struct pipe_blend_state *pbs = &rctx->blend->state.blend; uint32_t color_control, target_mask; - int i; + int i, r; target_mask = 0; color_control = S_028808_PER_MRT_BLEND(1); @@ -1215,7 +1175,9 @@ static struct radeon_state *r600_cb_cntl(struct r600_context *rctx) target_mask |= (pbs->rt[0].colormask << (4 * i)); } } - rstate = radeon_state(rscreen->rw, R600_CB_CNTL_TYPE, R600_CB_CNTL); + r = radeon_state_init(rstate, rscreen->rw, R600_CB_CNTL_TYPE, R600_CB_CNTL); + if (r) + return r; rstate->states[R600_CB_CNTL__CB_SHADER_MASK] = 0x0000000F; rstate->states[R600_CB_CNTL__CB_TARGET_MASK] = target_mask; rstate->states[R600_CB_CNTL__CB_COLOR_CONTROL] = color_control; @@ -1227,11 +1189,7 @@ static struct radeon_state *r600_cb_cntl(struct r600_context *rctx) rstate->states[R600_CB_CNTL__CB_CLRCMP_DST] = 0x000000FF; rstate->states[R600_CB_CNTL__CB_CLRCMP_MSK] = 0xFFFFFFFF; rstate->states[R600_CB_CNTL__PA_SC_AA_MASK] = 0xFFFFFFFF; - if (radeon_state_pm4(rstate)) { - radeon_state_decref(rstate); - return NULL; - } - return rstate; + return radeon_state_pm4(rstate); } int r600_context_hw_states(struct r600_context *rctx) @@ -1242,91 +1200,74 @@ int r600_context_hw_states(struct r600_context *rctx) /* free previous TODO determine what need to be updated, what * doesn't */ - //radeon_state_decref(rctx->hw_states.config); - radeon_state_decref(rctx->hw_states.cb_cntl); - radeon_state_decref(rctx->hw_states.db); - radeon_state_decref(rctx->hw_states.rasterizer); - radeon_state_decref(rctx->hw_states.scissor); - radeon_state_decref(rctx->hw_states.dsa); - radeon_state_decref(rctx->hw_states.blend); - radeon_state_decref(rctx->hw_states.viewport); - radeon_state_decref(rctx->hw_states.cb0); - for (i = 0; i < rctx->hw_states.ps_nresource; i++) { - radeon_state_decref(rctx->hw_states.ps_resource[i]); - rctx->hw_states.ps_resource[i] = NULL; - } - rctx->hw_states.ps_nresource = 0; - for (i = 0; i < rctx->hw_states.ps_nsampler; i++) { - radeon_state_decref(rctx->hw_states.ps_sampler[i]); - rctx->hw_states.ps_sampler[i] = NULL; - } - rctx->hw_states.ps_nsampler = 0; + memset(&rctx->hw_states, 0, sizeof(struct r600_context_hw_states)); /* build new states */ - rctx->hw_states.rasterizer = r600_rasterizer(rctx); - rctx->hw_states.scissor = r600_scissor(rctx); - rctx->hw_states.dsa = r600_dsa(rctx); - rctx->hw_states.blend = r600_blend(rctx); - rctx->hw_states.viewport = r600_viewport(rctx); - rctx->hw_states.cb0 = r600_cb0(rctx); - rctx->hw_states.db = r600_db(rctx); - rctx->hw_states.cb_cntl = r600_cb_cntl(rctx); + rctx->hw_states.config = rctx->config; + r600_rasterizer(rctx, &rctx->hw_states.rasterizer); + r600_scissor(rctx, &rctx->hw_states.scissor); + r600_dsa(rctx, &rctx->hw_states.dsa); + r600_blend(rctx, &rctx->hw_states.blend); + r600_viewport(rctx, &rctx->hw_states.viewport); + r600_cb0(rctx, &rctx->hw_states.cb0); + r600_db(rctx, &rctx->hw_states.db); + r600_cb_cntl(rctx, &rctx->hw_states.cb_cntl); for (i = 0; i < rctx->ps_nsampler; i++) { if (rctx->ps_sampler[i]) { - rctx->hw_states.ps_sampler[i] = r600_sampler(rctx, - &rctx->ps_sampler[i]->state.sampler, - R600_PS_SAMPLER + i); + r600_sampler(rctx, &rctx->hw_states.ps_sampler[i], + &rctx->ps_sampler[i]->state.sampler, + R600_PS_SAMPLER + i); } } rctx->hw_states.ps_nsampler = rctx->ps_nsampler; for (i = 0; i < rctx->ps_nsampler_view; i++) { if (rctx->ps_sampler_view[i]) { - rctx->hw_states.ps_resource[i] = r600_resource(rctx, - &rctx->ps_sampler_view[i]->state.sampler_view, - R600_PS_RESOURCE + i); + r600_resource(rctx, &rctx->hw_states.ps_resource[i], + &rctx->ps_sampler_view[i]->state.sampler_view, + R600_PS_RESOURCE + i); } } rctx->hw_states.ps_nresource = rctx->ps_nsampler_view; /* bind states */ - r = radeon_draw_set(rctx->draw, rctx->hw_states.db); + r = radeon_draw_set(&rctx->draw, &rctx->hw_states.db); if (r) return r; - r = radeon_draw_set(rctx->draw, rctx->hw_states.rasterizer); + r = radeon_draw_set(&rctx->draw, &rctx->hw_states.rasterizer); if (r) return r; - r = radeon_draw_set(rctx->draw, rctx->hw_states.scissor); + r = radeon_draw_set(&rctx->draw, &rctx->hw_states.scissor); if (r) return r; - r = radeon_draw_set(rctx->draw, rctx->hw_states.dsa); + r = radeon_draw_set(&rctx->draw, &rctx->hw_states.dsa); if (r) return r; - r = radeon_draw_set(rctx->draw, rctx->hw_states.blend); + r = radeon_draw_set(&rctx->draw, &rctx->hw_states.blend); if (r) return r; - r = radeon_draw_set(rctx->draw, rctx->hw_states.viewport); + r = radeon_draw_set(&rctx->draw, &rctx->hw_states.viewport); if (r) return r; - r = radeon_draw_set(rctx->draw, rctx->hw_states.cb0); + r = radeon_draw_set(&rctx->draw, &rctx->hw_states.cb0); if (r) return r; - r = radeon_draw_set(rctx->draw, rctx->hw_states.config); + r = radeon_draw_set(&rctx->draw, &rctx->hw_states.config); if (r) return r; - r = radeon_draw_set(rctx->draw, rctx->hw_states.cb_cntl); + r = radeon_draw_set(&rctx->draw, &rctx->hw_states.cb_cntl); if (r) return r; for (i = 0; i < rctx->hw_states.ps_nresource; i++) { - if (rctx->hw_states.ps_resource[i]) { - r = radeon_draw_set(rctx->draw, rctx->hw_states.ps_resource[i]); + if (rctx->hw_states.ps_resource[i].valid) { + r = radeon_draw_set(&rctx->draw, &rctx->hw_states.ps_resource[i]); if (r) return r; } } for (i = 0; i < rctx->hw_states.ps_nsampler; i++) { - if (rctx->hw_states.ps_sampler[i]) { - r = radeon_draw_set(rctx->draw, rctx->hw_states.ps_sampler[i]); + if (rctx->hw_states.ps_sampler[i].valid) { + r = radeon_draw_set(&rctx->draw, &rctx->hw_states.ps_sampler[i]); if (r) return r; } diff --git a/src/gallium/drivers/r600/radeon.h b/src/gallium/drivers/r600/radeon.h index 3a8405f9b4..709ef8a85a 100644 --- a/src/gallium/drivers/r600/radeon.h +++ b/src/gallium/drivers/r600/radeon.h @@ -103,17 +103,17 @@ int radeon_bo_wait(struct radeon *radeon, struct radeon_bo *bo); */ struct radeon_state { struct radeon *radeon; - unsigned refcount; + unsigned valid; unsigned type; unsigned id; unsigned nstates; - u32 *states; + u32 states[64]; unsigned npm4; unsigned cpm4; u32 pm4_crc; - u32 *pm4; + u32 pm4[128]; u32 nimmd; - u32 *immd; + u32 immd[64]; unsigned nbo; struct radeon_bo *bo[4]; unsigned nreloc; @@ -123,35 +123,51 @@ struct radeon_state { unsigned bo_dirty[4]; }; -struct radeon_state *radeon_state(struct radeon *radeon, u32 type, u32 id); -struct radeon_state *radeon_state_incref(struct radeon_state *state); -struct radeon_state *radeon_state_decref(struct radeon_state *state); +int radeon_state_init(struct radeon_state *state, struct radeon *radeon, u32 type, u32 id); int radeon_state_pm4(struct radeon_state *state); /* * draw functions */ struct radeon_draw { - unsigned refcount; struct radeon *radeon; unsigned nstate; - struct radeon_state **state; + struct radeon_state state[1273]; unsigned cpm4; }; -struct radeon_draw *radeon_draw(struct radeon *radeon); -struct radeon_draw *radeon_draw_duplicate(struct radeon_draw *draw); -struct radeon_draw *radeon_draw_incref(struct radeon_draw *draw); -struct radeon_draw *radeon_draw_decref(struct radeon_draw *draw); +int radeon_draw_init(struct radeon_draw *draw, struct radeon *radeon); int radeon_draw_set(struct radeon_draw *draw, struct radeon_state *state); -int radeon_draw_set_new(struct radeon_draw *draw, struct radeon_state *state); int radeon_draw_check(struct radeon_draw *draw); -struct radeon_ctx *radeon_ctx(struct radeon *radeon); -struct radeon_ctx *radeon_ctx_decref(struct radeon_ctx *ctx); -struct radeon_ctx *radeon_ctx_incref(struct radeon_ctx *ctx); +/* + * Context + */ +#pragma pack(1) +struct radeon_cs_reloc { + uint32_t handle; + uint32_t read_domain; + uint32_t write_domain; + uint32_t flags; +}; +#pragma pack() + +struct radeon_ctx { + struct radeon *radeon; + u32 *pm4; + u32 cpm4; + u32 draw_cpm4; + unsigned id; + unsigned nreloc; + struct radeon_cs_reloc reloc[2048]; + unsigned nbo; + struct radeon_bo *bo[2048]; + unsigned ndraw; + struct radeon_draw draw[128]; +}; + +int radeon_ctx_init(struct radeon_ctx *ctx, struct radeon *radeon); int radeon_ctx_set_draw(struct radeon_ctx *ctx, struct radeon_draw *draw); -int radeon_ctx_set_draw_new(struct radeon_ctx *ctx, struct radeon_draw *draw); int radeon_ctx_pm4(struct radeon_ctx *ctx); int radeon_ctx_submit(struct radeon_ctx *ctx); void radeon_ctx_dump_bof(struct radeon_ctx *ctx, const char *file); diff --git a/src/gallium/targets/dri-r600/Makefile b/src/gallium/targets/dri-r600/Makefile index 932303d194..9c8b4ab252 100644 --- a/src/gallium/targets/dri-r600/Makefile +++ b/src/gallium/targets/dri-r600/Makefile @@ -4,12 +4,12 @@ include $(TOP)/configs/current LIBNAME = r600_dri.so PIPE_DRIVERS = \ + $(TOP)/src/gallium/drivers/r600/libr600.a \ $(TOP)/src/gallium/state_trackers/dri/drm/libdridrm.a \ $(TOP)/src/gallium/winsys/r600/drm/libr600winsys.a \ $(TOP)/src/gallium/drivers/softpipe/libsoftpipe.a \ $(TOP)/src/gallium/drivers/trace/libtrace.a \ - $(TOP)/src/gallium/drivers/rbug/librbug.a \ - $(TOP)/src/gallium/drivers/r600/libr600.a + $(TOP)/src/gallium/drivers/rbug/librbug.a C_SOURCES = \ target.c \ diff --git a/src/gallium/winsys/r600/drm/radeon.c b/src/gallium/winsys/r600/drm/radeon.c index 7e65669806..24d821d5cf 100644 --- a/src/gallium/winsys/r600/drm/radeon.c +++ b/src/gallium/winsys/r600/drm/radeon.c @@ -43,16 +43,6 @@ static int radeon_get_device(struct radeon *radeon) return r; } -/* symbol missing drove me crazy hack to get symbol exported */ -static void fake(void) -{ - struct radeon_ctx *ctx; - struct radeon_draw *draw; - - ctx = radeon_ctx(NULL); - draw = radeon_draw(NULL); -} - struct radeon *radeon_new(int fd, unsigned device) { struct radeon *radeon; @@ -60,7 +50,6 @@ struct radeon *radeon_new(int fd, unsigned device) radeon = calloc(1, sizeof(*radeon)); if (radeon == NULL) { - fake(); return NULL; } radeon->fd = fd; diff --git a/src/gallium/winsys/r600/drm/radeon_ctx.c b/src/gallium/winsys/r600/drm/radeon_ctx.c index 6b0eba0b28..af270d5d20 100644 --- a/src/gallium/winsys/r600/drm/radeon_ctx.c +++ b/src/gallium/winsys/r600/drm/radeon_ctx.c @@ -32,13 +32,8 @@ int radeon_ctx_set_bo_new(struct radeon_ctx *ctx, struct radeon_bo *bo) { - void *ptr; - - ptr = realloc(ctx->bo, sizeof(struct radeon_bo) * (ctx->nbo + 1)); - if (ptr == NULL) { - return -ENOMEM; - } - ctx->bo = ptr; + if (ctx->nbo >= 2048) + return -EBUSY; ctx->bo[ctx->nbo] = bo; ctx->nbo++; return 0; @@ -76,49 +71,26 @@ void radeon_ctx_get_placement(struct radeon_ctx *ctx, unsigned reloc, u32 *place } } -struct radeon_ctx *radeon_ctx(struct radeon *radeon) -{ - struct radeon_ctx *ctx; - - if (radeon == NULL) - return NULL; - ctx = calloc(1, sizeof(*ctx)); - if (ctx == NULL) - return NULL; - ctx->radeon = radeon_incref(radeon); - return ctx; -} - -struct radeon_ctx *radeon_ctx_incref(struct radeon_ctx *ctx) +static void radeon_ctx_clear(struct radeon_ctx *ctx) { - ctx->refcount++; - return ctx; + ctx->draw_cpm4 = 0; + ctx->cpm4 = 0; + ctx->ndraw = 0; + ctx->nbo = 0; + ctx->nreloc = 0; } -struct radeon_ctx *radeon_ctx_decref(struct radeon_ctx *ctx) +int radeon_ctx_init(struct radeon_ctx *ctx, struct radeon *radeon) { - unsigned i; - - if (ctx == NULL) - return NULL; - if (--ctx->refcount > 0) { - return NULL; - } - - for (i = 0; i < ctx->ndraw; i++) { - ctx->draw[i] = radeon_draw_decref(ctx->draw[i]); - } - for (i = 0; i < ctx->nbo; i++) { - ctx->bo[i] = radeon_bo_decref(ctx->radeon, ctx->bo[i]); - } - ctx->radeon = radeon_decref(ctx->radeon); - free(ctx->draw); - free(ctx->bo); + memset(ctx, 0, sizeof(struct radeon_ctx)); + ctx->radeon = radeon_incref(radeon); + radeon_ctx_clear(ctx); free(ctx->pm4); - free(ctx->reloc); - memset(ctx, 0, sizeof(*ctx)); - free(ctx); - return NULL; + ctx->cpm4 = 0; + ctx->pm4 = malloc(64 * 1024); + if (ctx->pm4 == NULL) + return -ENOMEM; + return 0; } static int radeon_ctx_state_bo(struct radeon_ctx *ctx, struct radeon_state *state) @@ -143,7 +115,6 @@ static int radeon_ctx_state_bo(struct radeon_ctx *ctx, struct radeon_state *stat return 0; } - int radeon_ctx_submit(struct radeon_ctx *ctx) { struct drm_radeon_cs drmib; @@ -170,6 +141,7 @@ int radeon_ctx_submit(struct radeon_ctx *ctx) r = drmCommandWriteRead(ctx->radeon->fd, DRM_RADEON_CS, &drmib, sizeof(struct drm_radeon_cs)); #endif + radeon_ctx_clear(ctx); return r; } @@ -177,7 +149,6 @@ static int radeon_ctx_reloc(struct radeon_ctx *ctx, struct radeon_bo *bo, unsigned id, unsigned *placement) { unsigned i; - struct radeon_cs_reloc *ptr; for (i = 0; i < ctx->nreloc; i++) { if (ctx->reloc[i].handle == bo->handle) { @@ -185,14 +156,12 @@ static int radeon_ctx_reloc(struct radeon_ctx *ctx, struct radeon_bo *bo, return 0; } } - ptr = realloc(ctx->reloc, sizeof(struct radeon_cs_reloc) * (ctx->nreloc + 1)); - if (ptr == NULL) - return -ENOMEM; - ctx->reloc = ptr; - ptr[ctx->nreloc].handle = bo->handle; - ptr[ctx->nreloc].read_domain = placement[0] | placement [1]; - ptr[ctx->nreloc].write_domain = placement[0] | placement [1]; - ptr[ctx->nreloc].flags = 0; + if (ctx->nreloc >= 2048) + return -EINVAL; + ctx->reloc[ctx->nreloc].handle = bo->handle; + ctx->reloc[ctx->nreloc].read_domain = placement[0] | placement [1]; + ctx->reloc[ctx->nreloc].write_domain = placement[0] | placement [1]; + ctx->reloc[ctx->nreloc].flags = 0; ctx->pm4[id] = ctx->nreloc * sizeof(struct radeon_cs_reloc) / 4; ctx->nreloc++; return 0; @@ -221,21 +190,13 @@ static int radeon_ctx_state_schedule(struct radeon_ctx *ctx, struct radeon_state return 0; } -int radeon_ctx_set_draw_new(struct radeon_ctx *ctx, struct radeon_draw *draw) +int radeon_ctx_set_draw(struct radeon_ctx *ctx, struct radeon_draw *draw) { - struct radeon_draw *pdraw = NULL; - struct radeon_draw **ndraw; - struct radeon_state *nstate, *ostate; - unsigned cpm4, i, cstate; - void *tmp; + unsigned cpm4, i; int r = 0; - ndraw = realloc(ctx->draw, sizeof(void*) * (ctx->ndraw + 1)); - if (ndraw == NULL) - return -ENOMEM; - ctx->draw = ndraw; for (i = 0; i < draw->nstate; i++) { - r = radeon_ctx_state_bo(ctx, draw->state[i]); + r = radeon_ctx_state_bo(ctx, &draw->state[i]); if (r) return r; } @@ -247,69 +208,48 @@ int radeon_ctx_set_draw_new(struct radeon_ctx *ctx, struct radeon_draw *draw) __func__, draw->cpm4, RADEON_CTX_MAX_PM4); return -EINVAL; } - tmp = realloc(ctx->state, (ctx->nstate + draw->nstate) * sizeof(void*)); - if (tmp == NULL) - return -ENOMEM; - ctx->state = tmp; - pdraw = ctx->cdraw; - for (i = 0, cpm4 = 0, cstate = ctx->nstate; i < draw->nstate - 1; i++) { - nstate = draw->state[i]; - if (nstate) { - if (pdraw && pdraw->state[i]) { - ostate = pdraw->state[i]; - if (ostate->pm4_crc != nstate->pm4_crc) { - ctx->state[cstate++] = nstate; - cpm4 += nstate->cpm4; - } - } else { - ctx->state[cstate++] = nstate; - cpm4 += nstate->cpm4; + ctx->draw[ctx->ndraw] = *draw; + for (i = 0, cpm4 = 0; i < draw->nstate - 1; i++) { + ctx->draw[ctx->ndraw].state[i].valid &= ~2; + if (ctx->draw[ctx->ndraw].state[i].valid) { + if (ctx->ndraw > 1 && ctx->draw[ctx->ndraw - 1].state[i].valid) { + if (ctx->draw[ctx->ndraw - 1].state[i].pm4_crc == draw->state[i].pm4_crc) + continue; } + ctx->draw[ctx->ndraw].state[i].valid |= 2; + cpm4 += ctx->draw[ctx->ndraw].state[i].cpm4; } } /* The last state is the draw state always add it */ - if (draw->state[i] == NULL) { + if (!draw->state[i].valid) { fprintf(stderr, "%s no draw command\n", __func__); return -EINVAL; } - ctx->state[cstate++] = draw->state[i]; - cpm4 += draw->state[i]->cpm4; + ctx->draw[ctx->ndraw].state[i].valid |= 2; + cpm4 += ctx->draw[ctx->ndraw].state[i].cpm4; if ((ctx->draw_cpm4 + cpm4) > RADEON_CTX_MAX_PM4) { /* need to flush */ return -EBUSY; } ctx->draw_cpm4 += cpm4; - ctx->nstate = cstate; - ctx->draw[ctx->ndraw++] = draw; - ctx->cdraw = draw; + ctx->ndraw++; return 0; } -int radeon_ctx_set_draw(struct radeon_ctx *ctx, struct radeon_draw *draw) -{ - int r; - - radeon_draw_incref(draw); - r = radeon_ctx_set_draw_new(ctx, draw); - if (r) - radeon_draw_decref(draw); - return r; -} - int radeon_ctx_pm4(struct radeon_ctx *ctx) { - unsigned i; + unsigned i, j, c; int r; - free(ctx->pm4); - ctx->cpm4 = 0; - ctx->pm4 = malloc(ctx->draw_cpm4 * 4); - if (ctx->pm4 == NULL) - return -EINVAL; - for (i = 0, ctx->id = 0; i < ctx->nstate; i++) { - r = radeon_ctx_state_schedule(ctx, ctx->state[i]); - if (r) - return r; + for (i = 0, c = 0, ctx->id = 0; i < ctx->ndraw; i++) { + for (j = 0; j < ctx->draw[i].nstate; j++) { + if (ctx->draw[i].state[j].valid & 2) { + r = radeon_ctx_state_schedule(ctx, &ctx->draw[i].state[j]); + if (r) + return r; + c += ctx->draw[i].state[j].cpm4; + } + } } if (ctx->id != ctx->draw_cpm4) { fprintf(stderr, "%s miss predicted pm4 size %d for %d\n", diff --git a/src/gallium/winsys/r600/drm/radeon_draw.c b/src/gallium/winsys/r600/drm/radeon_draw.c index 4413ed79fb..53699eb0b1 100644 --- a/src/gallium/winsys/r600/drm/radeon_draw.c +++ b/src/gallium/winsys/r600/drm/radeon_draw.c @@ -31,111 +31,33 @@ /* * draw functions */ -struct radeon_draw *radeon_draw(struct radeon *radeon) +int radeon_draw_init(struct radeon_draw *draw, struct radeon *radeon) { - struct radeon_draw *draw; - - draw = calloc(1, sizeof(*draw)); - if (draw == NULL) - return NULL; + memset(draw, 0, sizeof(struct radeon_draw)); draw->nstate = radeon->nstate; draw->radeon = radeon; - draw->refcount = 1; - draw->state = calloc(1, sizeof(void*) * draw->nstate); - if (draw->state == NULL) { - free(draw); - return NULL; - } - return draw; -} - -struct radeon_draw *radeon_draw_incref(struct radeon_draw *draw) -{ - draw->refcount++; - return draw; -} - -struct radeon_draw *radeon_draw_decref(struct radeon_draw *draw) -{ - unsigned i; - - if (draw == NULL) - return NULL; - if (--draw->refcount > 0) - return NULL; - for (i = 0; i < draw->nstate; i++) { - draw->state[i] = radeon_state_decref(draw->state[i]); - } - free(draw->state); - memset(draw, 0, sizeof(*draw)); - free(draw); - return NULL; + return 0; } -int radeon_draw_set_new(struct radeon_draw *draw, struct radeon_state *state) +int radeon_draw_set(struct radeon_draw *draw, struct radeon_state *state) { if (state == NULL) return 0; if (state->type >= draw->radeon->ntype) return -EINVAL; - draw->state[state->id] = radeon_state_decref(draw->state[state->id]); - draw->state[state->id] = state; + draw->state[state->id] = *state; return 0; } -int radeon_draw_set(struct radeon_draw *draw, struct radeon_state *state) -{ - if (state == NULL) - return 0; - radeon_state_incref(state); - return radeon_draw_set_new(draw, state); -} - int radeon_draw_check(struct radeon_draw *draw) { unsigned i; int r; - r = radeon_draw_pm4(draw); - if (r) - return r; for (i = 0, draw->cpm4 = 0; i < draw->nstate; i++) { - if (draw->state[i]) { - draw->cpm4 += draw->state[i]->cpm4; + if (draw->state[i].valid) { + draw->cpm4 += draw->state[i].cpm4; } } return 0; } - -struct radeon_draw *radeon_draw_duplicate(struct radeon_draw *draw) -{ - struct radeon_draw *ndraw; - unsigned i; - - if (draw == NULL) - return NULL; - ndraw = radeon_draw(draw->radeon); - if (ndraw == NULL) { - return NULL; - } - for (i = 0; i < draw->nstate; i++) { - if (radeon_draw_set(ndraw, draw->state[i])) { - radeon_draw_decref(ndraw); - return NULL; - } - } - return ndraw; -} - -int radeon_draw_pm4(struct radeon_draw *draw) -{ - unsigned i; - int r; - - for (i = 0; i < draw->nstate; i++) { - r = radeon_state_pm4(draw->state[i]); - if (r) - return r; - } - return 0; -} diff --git a/src/gallium/winsys/r600/drm/radeon_priv.h b/src/gallium/winsys/r600/drm/radeon_priv.h index b91421f438..80392cda96 100644 --- a/src/gallium/winsys/r600/drm/radeon_priv.h +++ b/src/gallium/winsys/r600/drm/radeon_priv.h @@ -24,7 +24,6 @@ #include "radeon.h" struct radeon; -struct radeon_ctx; /* * radeon functions @@ -71,34 +70,6 @@ extern unsigned radeon_type_from_id(struct radeon *radeon, unsigned id); /* * radeon context functions */ -#pragma pack(1) -struct radeon_cs_reloc { - uint32_t handle; - uint32_t read_domain; - uint32_t write_domain; - uint32_t flags; -}; -#pragma pack() - -struct radeon_ctx { - int refcount; - struct radeon *radeon; - u32 *pm4; - u32 cpm4; - u32 draw_cpm4; - unsigned id; - unsigned next_id; - unsigned nreloc; - struct radeon_cs_reloc *reloc; - unsigned nbo; - struct radeon_bo **bo; - unsigned ndraw; - struct radeon_draw *cdraw; - struct radeon_draw **draw; - unsigned nstate; - struct radeon_state **state; -}; - int radeon_ctx_set_bo_new(struct radeon_ctx *ctx, struct radeon_bo *bo); struct radeon_bo *radeon_ctx_get_bo(struct radeon_ctx *ctx, unsigned reloc); void radeon_ctx_get_placement(struct radeon_ctx *ctx, unsigned reloc, u32 *placement); diff --git a/src/gallium/winsys/r600/drm/radeon_state.c b/src/gallium/winsys/r600/drm/radeon_state.c index 308288557a..d7cd1d7a94 100644 --- a/src/gallium/winsys/r600/drm/radeon_state.c +++ b/src/gallium/winsys/r600/drm/radeon_state.c @@ -32,82 +32,23 @@ /* * state core functions */ -struct radeon_state *radeon_state(struct radeon *radeon, u32 type, u32 id) +int radeon_state_init(struct radeon_state *state, struct radeon *radeon, u32 type, u32 id) { - struct radeon_state *state; - if (type > radeon->ntype) { fprintf(stderr, "%s invalid type %d\n", __func__, type); - return NULL; + return -EINVAL; } if (id > radeon->nstate) { fprintf(stderr, "%s invalid state id %d\n", __func__, id); - return NULL; + return -EINVAL; } - state = calloc(1, sizeof(*state)); - if (state == NULL) - return NULL; + memset(state, 0, sizeof(struct radeon_state)); state->radeon = radeon; state->type = type; state->id = id; - state->refcount = 1; state->npm4 = radeon->type[type].npm4; state->nstates = radeon->type[type].nstates; - state->states = calloc(1, state->nstates * 4); - state->pm4 = calloc(1, radeon->type[type].npm4 * 4); - if (state->states == NULL || state->pm4 == NULL) { - radeon_state_decref(state); - return NULL; - } - return state; -} - -struct radeon_state *radeon_state_duplicate(struct radeon_state *state) -{ - struct radeon_state *nstate = radeon_state(state->radeon, state->type, state->id); - unsigned i; - - if (state == NULL) - return NULL; - nstate->cpm4 = state->cpm4; - nstate->nbo = state->nbo; - nstate->nreloc = state->nreloc; - memcpy(nstate->states, state->states, state->nstates * 4); - memcpy(nstate->pm4, state->pm4, state->npm4 * 4); - memcpy(nstate->placement, state->placement, 8 * 4); - memcpy(nstate->reloc_pm4_id, state->reloc_pm4_id, 8 * 4); - memcpy(nstate->reloc_bo_id, state->reloc_bo_id, 8 * 4); - memcpy(nstate->bo_dirty, state->bo_dirty, 4 * 4); - for (i = 0; i < state->nbo; i++) { - nstate->bo[i] = radeon_bo_incref(state->radeon, state->bo[i]); - } - return nstate; -} - -struct radeon_state *radeon_state_incref(struct radeon_state *state) -{ - state->refcount++; - return state; -} - -struct radeon_state *radeon_state_decref(struct radeon_state *state) -{ - unsigned i; - - if (state == NULL) - return NULL; - if (--state->refcount > 0) { - return NULL; - } - for (i = 0; i < state->nbo; i++) { - state->bo[i] = radeon_bo_decref(state->radeon, state->bo[i]); - } - free(state->immd); - free(state->states); - free(state->pm4); - memset(state, 0, sizeof(*state)); - free(state); - return NULL; + return 0; } int radeon_state_replace_always(struct radeon_state *ostate, @@ -156,6 +97,7 @@ int radeon_state_pm4(struct radeon_state *state) return r; } state->pm4_crc = crc32(state->pm4, state->cpm4 * 4); + state->valid = 1; return 0; } -- cgit v1.2.3 From 3e231361f3291307bfaa27f1bbea13fecc3b2072 Mon Sep 17 00:00:00 2001 From: Luca Barbieri Date: Thu, 5 Aug 2010 13:07:41 +0200 Subject: nvfx: shut up unknown cap 64 warning --- src/gallium/drivers/nvfx/nvfx_screen.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'src/gallium') diff --git a/src/gallium/drivers/nvfx/nvfx_screen.c b/src/gallium/drivers/nvfx/nvfx_screen.c index 80db28a07c..f2525ccb38 100644 --- a/src/gallium/drivers/nvfx/nvfx_screen.c +++ b/src/gallium/drivers/nvfx/nvfx_screen.c @@ -131,6 +131,8 @@ nvfx_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) return screen->is_nv4x ? 1 : 0; case PIPE_CAP_GEOMETRY_SHADER4: return 0; + case PIPE_CAP_DEPTH_CLAMP: + return 0; // TODO: implement depth clamp default: NOUVEAU_ERR("Unknown PIPE_CAP %d\n", param); return 0; -- cgit v1.2.3 From bf5ee5aa4f065b6e17a4cc9cd2e539c4290ff98c Mon Sep 17 00:00:00 2001 From: Luca Barbieri Date: Fri, 6 Aug 2010 00:58:48 +0200 Subject: nvfx: fix nv30 vertex program scalar opcodes Apparently they have always been broken, even before unification. Fixes a lot of stuff, starting from morph3d and lighting in teapot with textures disabled. --- src/gallium/drivers/nvfx/nvfx_vertprog.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'src/gallium') diff --git a/src/gallium/drivers/nvfx/nvfx_vertprog.c b/src/gallium/drivers/nvfx/nvfx_vertprog.c index 80b98b62d3..24d9846310 100644 --- a/src/gallium/drivers/nvfx/nvfx_vertprog.c +++ b/src/gallium/drivers/nvfx/nvfx_vertprog.c @@ -299,7 +299,13 @@ nvfx_vp_arith(struct nvfx_context* nvfx, struct nvfx_vpc *vpc, int slot, int op, (3 << NVFX_VP(INST_COND_SWZ_W_SHIFT))); if(!nvfx->is_nv4x) { - hw[1] |= (op << NV30_VP_INST_VEC_OPCODE_SHIFT); + if(slot == 0) + hw[1] |= (op << NV30_VP_INST_VEC_OPCODE_SHIFT); + else + { + hw[0] |= ((op >> 4) << NV30_VP_INST_SCA_OPCODEH_SHIFT); + hw[1] |= ((op & 0xf) << NV30_VP_INST_SCA_OPCODEL_SHIFT); + } // hw[3] |= NVFX_VP(INST_SCA_DEST_TEMP_MASK); // hw[3] |= (mask << NVFX_VP(INST_VEC_WRITEMASK_SHIFT)); -- cgit v1.2.3 From 4bd061b127aedfa7f6cd2c9fb4763927588c7ad1 Mon Sep 17 00:00:00 2001 From: Jakob Bornecrantz Date: Thu, 5 Aug 2010 17:11:46 -0700 Subject: gallivm: Only get debug option once --- src/gallium/auxiliary/gallivm/lp_bld_init.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'src/gallium') diff --git a/src/gallium/auxiliary/gallivm/lp_bld_init.c b/src/gallium/auxiliary/gallivm/lp_bld_init.c index 69353dea09..ef0888079c 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_init.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_init.c @@ -45,6 +45,8 @@ static const struct debug_named_value lp_bld_debug_flags[] = { { "nopt", GALLIVM_DEBUG_NO_OPT, NULL }, DEBUG_NAMED_VALUE_END }; + +DEBUG_GET_ONCE_FLAGS_OPTION(gallivm_debug, "GALLIVM_DEBUG", lp_bld_debug_flags, 0); #endif @@ -89,7 +91,7 @@ void lp_build_init(void) { #ifdef DEBUG - gallivm_debug = debug_get_flags_option("GALLIVM_DEBUG", lp_bld_debug_flags, 0 ); + gallivm_debug = debug_get_option_gallivm_debug(); #endif lp_set_target_options(); -- cgit v1.2.3 From e00b17830bb3193d5a24f6a9dfabb996e02c1d74 Mon Sep 17 00:00:00 2001 From: Jakob Bornecrantz Date: Thu, 5 Aug 2010 17:12:54 -0700 Subject: llvmpipe: Only get no rast option once --- src/gallium/drivers/llvmpipe/lp_context.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'src/gallium') diff --git a/src/gallium/drivers/llvmpipe/lp_context.c b/src/gallium/drivers/llvmpipe/lp_context.c index 3db4f12ebb..28793682ed 100644 --- a/src/gallium/drivers/llvmpipe/lp_context.c +++ b/src/gallium/drivers/llvmpipe/lp_context.c @@ -46,6 +46,10 @@ #include "lp_query.h" #include "lp_setup.h" + +DEBUG_GET_ONCE_BOOL_OPTION(lp_no_rast, "LP_NO_RAST", FALSE); + + static void llvmpipe_destroy( struct pipe_context *pipe ) { struct llvmpipe_context *llvmpipe = llvmpipe_context( pipe ); @@ -130,7 +134,7 @@ llvmpipe_create_context( struct pipe_screen *screen, void *priv ) /* FIXME: devise alternative to draw_texture_samplers */ - if (debug_get_bool_option( "LP_NO_RAST", FALSE )) + if (debug_get_option_lp_no_rast()) llvmpipe->no_rast = TRUE; llvmpipe->setup = lp_setup_create( &llvmpipe->pipe, -- cgit v1.2.3 From 4d65055b1f2023f95a4f9c2bfab55a67ef72739d Mon Sep 17 00:00:00 2001 From: Jakob Bornecrantz Date: Thu, 5 Aug 2010 17:21:49 -0700 Subject: util: Add option to not dump cpu caps --- src/gallium/auxiliary/util/u_cpu_detect.c | 39 +++++++++++++++++-------------- 1 file changed, 22 insertions(+), 17 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/auxiliary/util/u_cpu_detect.c b/src/gallium/auxiliary/util/u_cpu_detect.c index 879643463f..6f38d22285 100644 --- a/src/gallium/auxiliary/util/u_cpu_detect.c +++ b/src/gallium/auxiliary/util/u_cpu_detect.c @@ -73,6 +73,9 @@ #endif +DEBUG_GET_ONCE_BOOL_OPTION(dump_cpu, "GALLIUM_DUMP_CPU", TRUE); + + struct util_cpu_caps util_cpu_caps; #if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) @@ -500,23 +503,25 @@ util_cpu_detect(void) #endif /* PIPE_ARCH_PPC */ #ifdef DEBUG - debug_printf("util_cpu_caps.arch = %i\n", util_cpu_caps.arch); - debug_printf("util_cpu_caps.nr_cpus = %u\n", util_cpu_caps.nr_cpus); - - debug_printf("util_cpu_caps.x86_cpu_type = %u\n", util_cpu_caps.x86_cpu_type); - debug_printf("util_cpu_caps.cacheline = %u\n", util_cpu_caps.cacheline); - - debug_printf("util_cpu_caps.has_tsc = %u\n", util_cpu_caps.has_tsc); - debug_printf("util_cpu_caps.has_mmx = %u\n", util_cpu_caps.has_mmx); - debug_printf("util_cpu_caps.has_mmx2 = %u\n", util_cpu_caps.has_mmx2); - debug_printf("util_cpu_caps.has_sse = %u\n", util_cpu_caps.has_sse); - debug_printf("util_cpu_caps.has_sse2 = %u\n", util_cpu_caps.has_sse2); - debug_printf("util_cpu_caps.has_sse3 = %u\n", util_cpu_caps.has_sse3); - debug_printf("util_cpu_caps.has_ssse3 = %u\n", util_cpu_caps.has_ssse3); - debug_printf("util_cpu_caps.has_sse4_1 = %u\n", util_cpu_caps.has_sse4_1); - debug_printf("util_cpu_caps.has_3dnow = %u\n", util_cpu_caps.has_3dnow); - debug_printf("util_cpu_caps.has_3dnow_ext = %u\n", util_cpu_caps.has_3dnow_ext); - debug_printf("util_cpu_caps.has_altivec = %u\n", util_cpu_caps.has_altivec); + if (debug_get_option_dump_cpu()) { + debug_printf("util_cpu_caps.arch = %i\n", util_cpu_caps.arch); + debug_printf("util_cpu_caps.nr_cpus = %u\n", util_cpu_caps.nr_cpus); + + debug_printf("util_cpu_caps.x86_cpu_type = %u\n", util_cpu_caps.x86_cpu_type); + debug_printf("util_cpu_caps.cacheline = %u\n", util_cpu_caps.cacheline); + + debug_printf("util_cpu_caps.has_tsc = %u\n", util_cpu_caps.has_tsc); + debug_printf("util_cpu_caps.has_mmx = %u\n", util_cpu_caps.has_mmx); + debug_printf("util_cpu_caps.has_mmx2 = %u\n", util_cpu_caps.has_mmx2); + debug_printf("util_cpu_caps.has_sse = %u\n", util_cpu_caps.has_sse); + debug_printf("util_cpu_caps.has_sse2 = %u\n", util_cpu_caps.has_sse2); + debug_printf("util_cpu_caps.has_sse3 = %u\n", util_cpu_caps.has_sse3); + debug_printf("util_cpu_caps.has_ssse3 = %u\n", util_cpu_caps.has_ssse3); + debug_printf("util_cpu_caps.has_sse4_1 = %u\n", util_cpu_caps.has_sse4_1); + debug_printf("util_cpu_caps.has_3dnow = %u\n", util_cpu_caps.has_3dnow); + debug_printf("util_cpu_caps.has_3dnow_ext = %u\n", util_cpu_caps.has_3dnow_ext); + debug_printf("util_cpu_caps.has_altivec = %u\n", util_cpu_caps.has_altivec); + } #endif util_cpu_detect_initialized = TRUE; -- cgit v1.2.3 From 9f5c1194ff0ff69be5d7641d68169b152bc6cd0a Mon Sep 17 00:00:00 2001 From: Jakob Bornecrantz Date: Thu, 5 Aug 2010 17:44:05 -0700 Subject: tgsi: Add option to stop the sanity checker from printing --- src/gallium/auxiliary/tgsi/tgsi_sanity.c | 13 +++++++++++++ src/gallium/auxiliary/tgsi/tgsi_sanity.h | 3 ++- 2 files changed, 15 insertions(+), 1 deletion(-) (limited to 'src/gallium') diff --git a/src/gallium/auxiliary/tgsi/tgsi_sanity.c b/src/gallium/auxiliary/tgsi/tgsi_sanity.c index 97148dbe23..d0550110d8 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_sanity.c +++ b/src/gallium/auxiliary/tgsi/tgsi_sanity.c @@ -33,6 +33,10 @@ #include "tgsi_info.h" #include "tgsi_iterate.h" + +DEBUG_GET_ONCE_BOOL_OPTION(print_sanity, "TGSI_PRINT_SANITY", FALSE); + + typedef struct { uint file : 28; /* max 2 dimensions */ @@ -54,6 +58,8 @@ struct sanity_check_ctx uint errors; uint warnings; uint implied_array_size; + + boolean print; }; static INLINE unsigned @@ -148,6 +154,9 @@ report_error( { va_list args; + if (!ctx->print) + return; + debug_printf( "Error : " ); va_start( args, format ); _debug_vprintf( format, args ); @@ -164,6 +173,9 @@ report_warning( { va_list args; + if (!ctx->print) + return; + debug_printf( "Warning: " ); va_start( args, format ); _debug_vprintf( format, args ); @@ -539,6 +551,7 @@ tgsi_sanity_check( ctx.errors = 0; ctx.warnings = 0; ctx.implied_array_size = 0; + ctx.print = debug_get_option_print_sanity(); if (!tgsi_iterate_shader( tokens, &ctx.iter )) return FALSE; diff --git a/src/gallium/auxiliary/tgsi/tgsi_sanity.h b/src/gallium/auxiliary/tgsi/tgsi_sanity.h index 52263ff883..46d8d18419 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_sanity.h +++ b/src/gallium/auxiliary/tgsi/tgsi_sanity.h @@ -35,7 +35,8 @@ extern "C" { #endif /* Check the given token stream for errors and common mistakes. - * Diagnostic messages are printed out to the debug output. + * Diagnostic messages are printed out to the debug output, and is + * controlled by the debug option TGSI_PRINT_SANITY (default true). * Returns TRUE if there are no errors, even though there could be some warnings. */ boolean -- cgit v1.2.3 From 1a3a04d56b01714e4fa19aa7efcdae4b6644af46 Mon Sep 17 00:00:00 2001 From: Jakob Bornecrantz Date: Thu, 5 Aug 2010 17:52:39 -0700 Subject: tgsi: Fix typo, so we follow what is in the comments --- src/gallium/auxiliary/tgsi/tgsi_sanity.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium') diff --git a/src/gallium/auxiliary/tgsi/tgsi_sanity.c b/src/gallium/auxiliary/tgsi/tgsi_sanity.c index d0550110d8..9e02040f6c 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_sanity.c +++ b/src/gallium/auxiliary/tgsi/tgsi_sanity.c @@ -34,7 +34,7 @@ #include "tgsi_iterate.h" -DEBUG_GET_ONCE_BOOL_OPTION(print_sanity, "TGSI_PRINT_SANITY", FALSE); +DEBUG_GET_ONCE_BOOL_OPTION(print_sanity, "TGSI_PRINT_SANITY", TRUE); typedef struct { -- cgit v1.2.3 From 4d10ec4f18cfb6c386feb65805713584ff730652 Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Fri, 6 Aug 2010 01:08:12 +0200 Subject: r300g: do not emit GB_Z_PEQ_CONFIG on non-r500 if DRM < 2.6.0 --- src/gallium/drivers/r300/r300_context.c | 7 +++++-- src/gallium/drivers/r300/r300_winsys.h | 1 + src/gallium/winsys/radeon/drm/radeon_drm.c | 4 ++++ src/gallium/winsys/radeon/drm/radeon_r300.c | 2 ++ src/gallium/winsys/radeon/drm/radeon_winsys.h | 6 ++++++ 5 files changed, 18 insertions(+), 2 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c index 4658ab0ff5..e8b6c4f7af 100644 --- a/src/gallium/drivers/r300/r300_context.c +++ b/src/gallium/drivers/r300/r300_context.c @@ -171,6 +171,7 @@ static void r300_setup_atoms(struct r300_context* r300) boolean is_r500 = r300->screen->caps.is_r500; boolean has_tcl = r300->screen->caps.has_tcl; boolean drm_2_3_0 = r300->rws->get_value(r300->rws, R300_VID_DRM_2_3_0); + boolean drm_2_6_0 = r300->rws->get_value(r300->rws, R300_VID_DRM_2_6_0); boolean has_hyperz = r300->rws->get_value(r300->rws, R300_CAN_HYPERZ); boolean has_hiz_ram = r300->screen->caps.hiz_ram > 0; @@ -195,7 +196,7 @@ static void r300_setup_atoms(struct r300_context* r300) R300_INIT_ATOM(gpu_flush, 9); R300_INIT_ATOM(aa_state, 4); R300_INIT_ATOM(fb_state, 0); - R300_INIT_ATOM(hyperz_state, is_rv350 ? 10 : 8); + R300_INIT_ATOM(hyperz_state, is_r500 || (is_rv350 && drm_2_6_0) ? 10 : 8); /* ZB (unpipelined), SC. */ R300_INIT_ATOM(ztop_state, 2); /* ZB, FG. */ @@ -373,7 +374,9 @@ static void r300_init_states(struct pipe_context *pipe) OUT_CB_REG(R300_ZB_DEPTHCLEARVALUE, 0); OUT_CB_REG(R300_SC_HYPERZ, R300_SC_HYPERZ_ADJ_2); - if (r300->screen->caps.is_rv350) { + if (r300->screen->caps.is_r500 || + (r300->screen->caps.is_rv350 && + r300->rws->get_value(r300->rws, R300_VID_DRM_2_6_0))) { OUT_CB_REG(R300_GB_Z_PEQ_CONFIG, 0); } END_CB; diff --git a/src/gallium/drivers/r300/r300_winsys.h b/src/gallium/drivers/r300/r300_winsys.h index e7a1ede4fb..187780750f 100644 --- a/src/gallium/drivers/r300/r300_winsys.h +++ b/src/gallium/drivers/r300/r300_winsys.h @@ -49,6 +49,7 @@ enum r300_value_id { R300_VID_Z_PIPES, R300_VID_SQUARE_TILING_SUPPORT, R300_VID_DRM_2_3_0, + R300_VID_DRM_2_6_0, R300_CAN_HYPERZ, }; diff --git a/src/gallium/winsys/radeon/drm/radeon_drm.c b/src/gallium/winsys/radeon/drm/radeon_drm.c index 3604827700..ecaf096dea 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm.c +++ b/src/gallium/winsys/radeon/drm/radeon_drm.c @@ -103,6 +103,10 @@ static void do_ioctls(int fd, struct radeon_libdrm_winsys* winsys) winsys->drm_2_3_0 = version->version_major > 2 || version->version_minor >= 3; + winsys->drm_2_6_0 = version->version_major > 2 || + (version->version_major == 2 && + version->version_minor >= 6); + info.request = RADEON_INFO_DEVICE_ID; retval = drmCommandWriteRead(fd, DRM_RADEON_INFO, &info, sizeof(info)); if (retval) { diff --git a/src/gallium/winsys/radeon/drm/radeon_r300.c b/src/gallium/winsys/radeon/drm/radeon_r300.c index 955ae4c045..5840098642 100644 --- a/src/gallium/winsys/radeon/drm/radeon_r300.c +++ b/src/gallium/winsys/radeon/drm/radeon_r300.c @@ -211,6 +211,8 @@ static uint32_t radeon_get_value(struct r300_winsys_screen *rws, return ws->squaretiling; case R300_VID_DRM_2_3_0: return ws->drm_2_3_0; + case R300_VID_DRM_2_6_0: + return ws->drm_2_6_0; case R300_CAN_HYPERZ: return ws->hyperz; } diff --git a/src/gallium/winsys/radeon/drm/radeon_winsys.h b/src/gallium/winsys/radeon/drm/radeon_winsys.h index 52db0d62d2..6f4aa4bce3 100644 --- a/src/gallium/winsys/radeon/drm/radeon_winsys.h +++ b/src/gallium/winsys/radeon/drm/radeon_winsys.h @@ -65,6 +65,12 @@ struct radeon_libdrm_winsys { */ boolean drm_2_3_0; + /* DRM 2.6.0 + * - Hyper-Z + * - GB_Z_PEQ_CONFIG allowed on rv350->r4xx, we should initialize it + */ + boolean drm_2_6_0; + /* hyperz user */ boolean hyperz; -- cgit v1.2.3 From 121a625c1651ddc181e374ebdf16bc5c46f6eaa9 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Fri, 6 Aug 2010 11:44:31 +1000 Subject: r600g: add bo wait after map. --- src/gallium/drivers/r600/r600_texture.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'src/gallium') diff --git a/src/gallium/drivers/r600/r600_texture.c b/src/gallium/drivers/r600/r600_texture.c index 033c71f6ef..1bce911306 100644 --- a/src/gallium/drivers/r600/r600_texture.c +++ b/src/gallium/drivers/r600/r600_texture.c @@ -240,6 +240,8 @@ void* r600_texture_transfer_map(struct pipe_context *ctx, if (radeon_bo_map(rscreen->rw, resource->bo)) { return NULL; } + radeon_bo_wait(rscreen->rw, resource->bo); + map = resource->bo->data; return map + rtransfer->offset + -- cgit v1.2.3 From 2b9036476511edd549d1c2cea6044eef4652a19c Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Fri, 6 Aug 2010 14:53:38 +1000 Subject: r600g: improve supported format selection. This fixes fbo-readpixels piglit test, and adds support for swapping the formats. Not all formats are correct yet I don't think. Signed-off-by: Dave Airlie --- src/gallium/drivers/r600/r600_draw.c | 6 +- src/gallium/drivers/r600/r600_helper.c | 99 ----------------- src/gallium/drivers/r600/r600_screen.c | 76 ++++++------- src/gallium/drivers/r600/r600_state.c | 15 ++- src/gallium/drivers/r600/r600_state_inlines.h | 153 ++++++++++++++++++++++++++ src/gallium/drivers/r600/r600d.h | 5 + 6 files changed, 212 insertions(+), 142 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/drivers/r600/r600_draw.c b/src/gallium/drivers/r600/r600_draw.c index 7130bf2fa8..3a54cee2d9 100644 --- a/src/gallium/drivers/r600/r600_draw.c +++ b/src/gallium/drivers/r600/r600_draw.c @@ -34,7 +34,7 @@ #include "r600_screen.h" #include "r600_context.h" #include "r600_resource.h" -#include "r600d.h" +#include "r600_state_inlines.h" struct r600_draw { struct pipe_context *ctx; @@ -100,9 +100,7 @@ static int r600_draw_common(struct r600_draw *draw) vertex_buffer = &rctx->vertex_buffer[j]; rbuffer = (struct r600_resource*)vertex_buffer->buffer; offset = rctx->vertex_elements->elements[i].src_offset + vertex_buffer->buffer_offset; - r = r600_conv_pipe_format(rctx->vertex_elements->elements[i].src_format, &format); - if (r) - return r; + format = r600_translate_colorformat(rctx->vertex_elements->elements[i].src_format); r = radeon_state_init(&vs_resource, rscreen->rw, R600_VS_RESOURCE_TYPE, R600_VS_RESOURCE + i); if (r) return r; diff --git a/src/gallium/drivers/r600/r600_helper.c b/src/gallium/drivers/r600/r600_helper.c index c672fe7386..5e0e0aab57 100644 --- a/src/gallium/drivers/r600/r600_helper.c +++ b/src/gallium/drivers/r600/r600_helper.c @@ -30,105 +30,6 @@ #include "r600_context.h" #include "r600d.h" -int r600_conv_pipe_format(unsigned pformat, unsigned *format) -{ - switch (pformat) { - case PIPE_FORMAT_R32G32B32_FLOAT: - *format = 0x30; - return 0; - case PIPE_FORMAT_R32G32B32A32_FLOAT: - *format = V_0280A0_COLOR_32_32_32_32_FLOAT; - return 0; - case PIPE_FORMAT_A8R8G8B8_UNORM: - case PIPE_FORMAT_X8R8G8B8_UNORM: - case PIPE_FORMAT_B8G8R8A8_UNORM: - case PIPE_FORMAT_B8G8R8X8_UNORM: - case PIPE_FORMAT_R8G8B8A8_UNORM: - case PIPE_FORMAT_R8G8B8X8_UNORM: - case PIPE_FORMAT_R8G8B8A8_USCALED: - case PIPE_FORMAT_R8G8B8A8_SNORM: - case PIPE_FORMAT_R8G8B8A8_SSCALED: - *format = V_0280A0_COLOR_8_8_8_8; - return 0; - case PIPE_FORMAT_R32_FLOAT: - *format = V_0280A0_COLOR_32_FLOAT; - return 0; - case PIPE_FORMAT_R32G32_FLOAT: - *format = V_0280A0_COLOR_32_32_FLOAT; - return 0; - case PIPE_FORMAT_L8_UNORM: - case PIPE_FORMAT_A8_UNORM: - case PIPE_FORMAT_I8_UNORM: - *format = V_0280A0_COLOR_8; - return 0; - case PIPE_FORMAT_B4G4R4A4_UNORM: - *format = V_0280A0_COLOR_4_4_4_4; - return 0; - case PIPE_FORMAT_B5G6R5_UNORM: - *format = V_0280A0_COLOR_5_6_5; - return 0; - case PIPE_FORMAT_L16_UNORM: - case PIPE_FORMAT_Z16_UNORM: - case PIPE_FORMAT_Z32_UNORM: - case PIPE_FORMAT_Z32_FLOAT: - case PIPE_FORMAT_R64_FLOAT: - case PIPE_FORMAT_R64G64_FLOAT: - case PIPE_FORMAT_R64G64B64_FLOAT: - case PIPE_FORMAT_R64G64B64A64_FLOAT: - case PIPE_FORMAT_R32_UNORM: - case PIPE_FORMAT_R32G32_UNORM: - case PIPE_FORMAT_R32G32B32_UNORM: - case PIPE_FORMAT_R32G32B32A32_UNORM: - case PIPE_FORMAT_R32_USCALED: - case PIPE_FORMAT_R32G32_USCALED: - case PIPE_FORMAT_R32G32B32_USCALED: - case PIPE_FORMAT_R32G32B32A32_USCALED: - case PIPE_FORMAT_R32_SNORM: - case PIPE_FORMAT_R32G32_SNORM: - case PIPE_FORMAT_R32G32B32_SNORM: - case PIPE_FORMAT_R32G32B32A32_SNORM: - case PIPE_FORMAT_R32_SSCALED: - case PIPE_FORMAT_R32G32_SSCALED: - case PIPE_FORMAT_R32G32B32_SSCALED: - case PIPE_FORMAT_R32G32B32A32_SSCALED: - case PIPE_FORMAT_R16_UNORM: - case PIPE_FORMAT_R16G16_UNORM: - case PIPE_FORMAT_R16G16B16_UNORM: - case PIPE_FORMAT_R16G16B16A16_UNORM: - case PIPE_FORMAT_R16_USCALED: - case PIPE_FORMAT_R16G16_USCALED: - case PIPE_FORMAT_R16G16B16_USCALED: - case PIPE_FORMAT_R16G16B16A16_USCALED: - case PIPE_FORMAT_R16_SNORM: - case PIPE_FORMAT_R16G16_SNORM: - case PIPE_FORMAT_R16G16B16_SNORM: - case PIPE_FORMAT_R16G16B16A16_SNORM: - case PIPE_FORMAT_R16_SSCALED: - case PIPE_FORMAT_R16G16_SSCALED: - case PIPE_FORMAT_R16G16B16_SSCALED: - case PIPE_FORMAT_R16G16B16A16_SSCALED: - case PIPE_FORMAT_R8_UNORM: - case PIPE_FORMAT_R8G8_UNORM: - case PIPE_FORMAT_R8G8B8_UNORM: - case PIPE_FORMAT_R8_USCALED: - case PIPE_FORMAT_R8G8_USCALED: - case PIPE_FORMAT_R8G8B8_USCALED: - case PIPE_FORMAT_R8_SNORM: - case PIPE_FORMAT_R8G8_SNORM: - case PIPE_FORMAT_R8G8B8_SNORM: - case PIPE_FORMAT_R8_SSCALED: - case PIPE_FORMAT_R8G8_SSCALED: - case PIPE_FORMAT_R8G8B8_SSCALED: - case PIPE_FORMAT_R32_FIXED: - case PIPE_FORMAT_R32G32_FIXED: - case PIPE_FORMAT_R32G32B32_FIXED: - case PIPE_FORMAT_R32G32B32A32_FIXED: - default: - R600_ERR("unsupported %d\n", pformat); - return -EINVAL; - } -} - int r600_conv_pipe_prim(unsigned pprim, unsigned *prim) { switch (pprim) { diff --git a/src/gallium/drivers/r600/r600_screen.c b/src/gallium/drivers/r600/r600_screen.c index 68615ca162..4b87327a7c 100644 --- a/src/gallium/drivers/r600/r600_screen.c +++ b/src/gallium/drivers/r600/r600_screen.c @@ -32,6 +32,7 @@ #include "r600_context.h" #include "r600_public.h" #include "r600_resource.h" +#include "r600_state_inlines.h" static const char* r600_get_vendor(struct pipe_screen* pscreen) { @@ -133,50 +134,51 @@ static boolean r600_is_format_supported(struct pipe_screen* screen, enum pipe_format format, enum pipe_texture_target target, unsigned sample_count, - unsigned bindings, + unsigned usage, unsigned geom_flags) { + unsigned retval = 0; if (target >= PIPE_MAX_TEXTURE_TYPES) { R600_ERR("r600: unsupported texture type %d\n", target); return FALSE; } - switch (format) { - case PIPE_FORMAT_B4G4R4A4_UNORM: - case PIPE_FORMAT_B5G6R5_UNORM: - case PIPE_FORMAT_B5G5R5A1_UNORM: - case PIPE_FORMAT_A8_UNORM: - case PIPE_FORMAT_L8_UNORM: - case PIPE_FORMAT_A8R8G8B8_SRGB: - case PIPE_FORMAT_R8G8B8A8_SRGB: - case PIPE_FORMAT_DXT1_RGB: - case PIPE_FORMAT_DXT1_RGBA: - case PIPE_FORMAT_DXT3_RGBA: - case PIPE_FORMAT_DXT5_RGBA: - case PIPE_FORMAT_UYVY: - case PIPE_FORMAT_L8_SRGB: - case PIPE_FORMAT_L8A8_SRGB: - case PIPE_FORMAT_L8A8_UNORM: - case PIPE_FORMAT_A8R8G8B8_UNORM: - case PIPE_FORMAT_X8R8G8B8_UNORM: - case PIPE_FORMAT_R8G8B8A8_UNORM: - case PIPE_FORMAT_R8G8B8X8_UNORM: - case PIPE_FORMAT_B8G8R8A8_UNORM: - case PIPE_FORMAT_B8G8R8X8_UNORM: - case PIPE_FORMAT_A8B8G8R8_SRGB: - case PIPE_FORMAT_B8G8R8A8_SRGB: - case PIPE_FORMAT_I8_UNORM: - case PIPE_FORMAT_Z16_UNORM: - case PIPE_FORMAT_X8Z24_UNORM: - case PIPE_FORMAT_S8_USCALED_Z24_UNORM: - case PIPE_FORMAT_Z32_UNORM: - case PIPE_FORMAT_Z24_UNORM_S8_USCALED: - case PIPE_FORMAT_Z24X8_UNORM: - return TRUE; - default: - /* Unknown format... */ - break; + + /* Multisample */ + if (sample_count > 1) + return FALSE; + + if ((usage & PIPE_BIND_SAMPLER_VIEW) && + r600_is_sampler_format_supported(format)) { + retval |= PIPE_BIND_SAMPLER_VIEW; + } + + if ((usage & (PIPE_BIND_RENDER_TARGET | + PIPE_BIND_DISPLAY_TARGET | + PIPE_BIND_SCANOUT | + PIPE_BIND_SHARED)) && + r600_is_colorbuffer_format_supported(format)) { + retval |= usage & + (PIPE_BIND_RENDER_TARGET | + PIPE_BIND_DISPLAY_TARGET | + PIPE_BIND_SCANOUT | + PIPE_BIND_SHARED); } - return FALSE; + + if ((usage & PIPE_BIND_DEPTH_STENCIL) && + r600_is_zs_format_supported(format)) { + retval |= PIPE_BIND_DEPTH_STENCIL; + } + + if ((usage & PIPE_BIND_VERTEX_BUFFER) && + r600_is_vertex_format_supported(format)) + retval |= PIPE_BIND_VERTEX_BUFFER; + + if (usage & PIPE_BIND_TRANSFER_READ) + retval |= PIPE_BIND_TRANSFER_READ; + if (usage & PIPE_BIND_TRANSFER_WRITE) + retval |= PIPE_BIND_TRANSFER_WRITE; + + return retval == usage; } static void r600_destroy_screen(struct pipe_screen* pscreen) diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index 926a19cc6f..b8d50452e6 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -668,6 +668,8 @@ static int r600_cb0(struct r600_context *rctx, struct radeon_state *rstate) const struct pipe_framebuffer_state *state = &rctx->framebuffer->state.framebuffer; unsigned level = state->cbufs[0]->level; unsigned pitch, slice; + unsigned color_info; + unsigned format, swap; int r; r = radeon_state_init(rstate, rscreen->rw, R600_CB0_TYPE, R600_CB0); @@ -685,8 +687,16 @@ static int r600_cb0(struct r600_context *rctx, struct radeon_state *rstate) rstate->nbo = 3; pitch = (rtex->pitch[level] / rtex->bpt) / 8 - 1; slice = (rtex->pitch[level] / rtex->bpt) * state->cbufs[0]->height / 64 - 1; + + format = r600_translate_colorformat(rtex->resource.base.b.format); + swap = r600_translate_colorswap(rtex->resource.base.b.format); + color_info = S_0280A0_FORMAT(format) | + S_0280A0_COMP_SWAP(swap) | + S_0280A0_BLEND_CLAMP(1) | + S_0280A0_SOURCE_FORMAT(1); + rstate->states[R600_CB0__CB_COLOR0_BASE] = 0x00000000; - rstate->states[R600_CB0__CB_COLOR0_INFO] = 0x08110068; + rstate->states[R600_CB0__CB_COLOR0_INFO] = color_info; rstate->states[R600_CB0__CB_COLOR0_SIZE] = S_028060_PITCH_TILE_MAX(pitch) | S_028060_SLICE_TILE_MAX(slice); rstate->states[R600_CB0__CB_COLOR0_VIEW] = 0x00000000; @@ -1103,7 +1113,8 @@ static int r600_resource(struct r600_context *rctx, struct radeon_state *rstate, r = radeon_state_init(rstate, rscreen->rw, R600_PS_RESOURCE_TYPE, id); if (r) return r; - if (r600_conv_pipe_format(view->texture->format, &format)) + format = r600_translate_colorformat(view->texture->format); + if (format == ~0) return -EINVAL; desc = util_format_description(view->texture->format); if (desc == NULL) { diff --git a/src/gallium/drivers/r600/r600_state_inlines.h b/src/gallium/drivers/r600/r600_state_inlines.h index 321e75d7a1..b45089dcc1 100644 --- a/src/gallium/drivers/r600/r600_state_inlines.h +++ b/src/gallium/drivers/r600/r600_state_inlines.h @@ -23,6 +23,9 @@ #ifndef R600_STATE_INLINES_H #define R600_STATE_INLINES_H +#include "util/u_format.h" +#include "r600d.h" + static INLINE uint32_t r600_translate_blend_function(int blend_func) { switch (blend_func) { @@ -126,4 +129,154 @@ static INLINE uint32_t r600_translate_ds_func(int func) return func; } +static uint32_t r600_translate_colorswap(enum pipe_format format) +{ + switch (format) { + /* 8-bit buffers. */ + case PIPE_FORMAT_A8_UNORM: + case PIPE_FORMAT_I8_UNORM: + case PIPE_FORMAT_L8_UNORM: + case PIPE_FORMAT_R8_UNORM: + case PIPE_FORMAT_R8_SNORM: + return SWAP_STD; + + /* 16-bit buffers. */ + case PIPE_FORMAT_B5G6R5_UNORM: + return SWAP_STD_REV; + + case PIPE_FORMAT_B5G5R5A1_UNORM: + case PIPE_FORMAT_B5G5R5X1_UNORM: + return SWAP_ALT; + + case PIPE_FORMAT_B4G4R4A4_UNORM: + case PIPE_FORMAT_B4G4R4X4_UNORM: + return SWAP_ALT; + /* 32-bit buffers. */ + case PIPE_FORMAT_B8G8R8A8_UNORM: + case PIPE_FORMAT_B8G8R8X8_UNORM: + return SWAP_ALT; + + case PIPE_FORMAT_A8R8G8B8_UNORM: + case PIPE_FORMAT_X8R8G8B8_UNORM: + return SWAP_ALT_REV; + case PIPE_FORMAT_R8G8B8A8_SNORM: + case PIPE_FORMAT_R8G8B8X8_UNORM: + return SWAP_STD; + + case PIPE_FORMAT_A8B8G8R8_UNORM: + case PIPE_FORMAT_X8B8G8R8_UNORM: +// case PIPE_FORMAT_R8SG8SB8UX8U_NORM: + return SWAP_STD_REV; + + case PIPE_FORMAT_R10G10B10A2_UNORM: + case PIPE_FORMAT_R10G10B10X2_SNORM: + case PIPE_FORMAT_B10G10R10A2_UNORM: + case PIPE_FORMAT_R10SG10SB10SA2U_NORM: + return SWAP_STD_REV; + + /* 64-bit buffers. */ + case PIPE_FORMAT_R16G16B16A16_UNORM: + case PIPE_FORMAT_R16G16B16A16_SNORM: +// return V_0280A0_COLOR_16_16_16_16; + case PIPE_FORMAT_R16G16B16A16_FLOAT: +// return V_0280A0_COLOR_16_16_16_16_FLOAT; + + /* 128-bit buffers. */ + case PIPE_FORMAT_R32G32B32A32_FLOAT: +// return V_0280A0_COLOR_32_32_32_32_FLOAT; + return 0; + default: + R600_ERR("unsupported colorswap format %d\n", format); + return ~0; + } + return ~0; + +} + +static INLINE uint32_t r600_translate_colorformat(enum pipe_format format) +{ + switch (format) { + /* 8-bit buffers. */ + case PIPE_FORMAT_A8_UNORM: + case PIPE_FORMAT_I8_UNORM: + case PIPE_FORMAT_L8_UNORM: + case PIPE_FORMAT_R8_UNORM: + case PIPE_FORMAT_R8_SNORM: + return V_0280A0_COLOR_8; + + /* 16-bit buffers. */ + case PIPE_FORMAT_B5G6R5_UNORM: + return V_0280A0_COLOR_5_6_5; + + case PIPE_FORMAT_B5G5R5A1_UNORM: + case PIPE_FORMAT_B5G5R5X1_UNORM: + return V_0280A0_COLOR_1_5_5_5; + + case PIPE_FORMAT_B4G4R4A4_UNORM: + case PIPE_FORMAT_B4G4R4X4_UNORM: + return V_0280A0_COLOR_4_4_4_4; + + /* 32-bit buffers. */ + case PIPE_FORMAT_B8G8R8A8_UNORM: + case PIPE_FORMAT_B8G8R8X8_UNORM: + case PIPE_FORMAT_A8R8G8B8_UNORM: + case PIPE_FORMAT_X8R8G8B8_UNORM: + case PIPE_FORMAT_A8B8G8R8_UNORM: + case PIPE_FORMAT_R8G8B8A8_SNORM: + case PIPE_FORMAT_X8B8G8R8_UNORM: + case PIPE_FORMAT_R8G8B8X8_UNORM: + case PIPE_FORMAT_R8SG8SB8UX8U_NORM: + return V_0280A0_COLOR_8_8_8_8; + + case PIPE_FORMAT_R10G10B10A2_UNORM: + case PIPE_FORMAT_R10G10B10X2_SNORM: + case PIPE_FORMAT_B10G10R10A2_UNORM: + case PIPE_FORMAT_R10SG10SB10SA2U_NORM: + return V_0280A0_COLOR_10_10_10_2; + + /* 64-bit buffers. */ + case PIPE_FORMAT_R16G16B16A16_UNORM: + case PIPE_FORMAT_R16G16B16A16_SNORM: + return V_0280A0_COLOR_16_16_16_16; + case PIPE_FORMAT_R16G16B16A16_FLOAT: + return V_0280A0_COLOR_16_16_16_16_FLOAT; + case PIPE_FORMAT_R32G32_FLOAT: + return V_0280A0_COLOR_32_32_FLOAT; + + /* 128-bit buffers. */ + case PIPE_FORMAT_R32G32B32_FLOAT: + case PIPE_FORMAT_R32G32B32A32_FLOAT: + return V_0280A0_COLOR_32_32_32_32_FLOAT; + + /* YUV buffers. */ + case PIPE_FORMAT_UYVY: +// return R300_COLOR_FORMAT_YVYU; + case PIPE_FORMAT_YUYV: +// return R300_COLOR_FORMAT_VYUY; + default: + return ~0; /* Unsupported. */ + } +} + +static INLINE boolean r600_is_sampler_format_supported(enum pipe_format format) +{ + return r600_translate_colorformat(format) != ~0; +} + +static INLINE boolean r600_is_colorbuffer_format_supported(enum pipe_format format) +{ + return r600_translate_colorformat(format) != ~0 && + r600_translate_colorswap(format) != ~0; +} + +static INLINE boolean r600_is_zs_format_supported(enum pipe_format format) +{ + return TRUE; +} + +static INLINE boolean r600_is_vertex_format_supported(enum pipe_format format) +{ + return r600_translate_colorformat(format) != ~0; +} + #endif diff --git a/src/gallium/drivers/r600/r600d.h b/src/gallium/drivers/r600/r600d.h index 8205bdeadc..2d0ede20fa 100644 --- a/src/gallium/drivers/r600/r600d.h +++ b/src/gallium/drivers/r600/r600d.h @@ -1169,4 +1169,9 @@ #define G_0286D4_PNT_SPRITE_TOP_1(x) (((x) >> 14) & 0x1) #define C_0286D4_PNT_SPRITE_TOP_1 0xFFFFBFFF +/* temporary swap */ +#define SWAP_STD 0 +#define SWAP_ALT 1 +#define SWAP_STD_REV 2 +#define SWAP_ALT_REV 3 #endif -- cgit v1.2.3 From b8de7788a4b20c702b06402e2e6eed60467e2522 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Fri, 6 Aug 2010 14:54:24 +1000 Subject: r600g: fix targetmask to work correctly. At least this seems to fix the glean maskedClear test. --- src/gallium/drivers/r600/r600_state.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index b8d50452e6..e43e4afe55 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -1179,12 +1179,11 @@ static int r600_cb_cntl(struct r600_context *rctx, struct radeon_state *rstate) } else color_control |= (0xcc << 16); - target_mask |= (pbs->rt[0].colormask); for (i = 0; i < 8; i++) { if (pbs->rt[i].blend_enable) { color_control |= S_028808_TARGET_BLEND_ENABLE(1 << i); - target_mask |= (pbs->rt[0].colormask << (4 * i)); } + target_mask |= (pbs->rt[i].colormask << (4 * i)); } r = radeon_state_init(rstate, rscreen->rw, R600_CB_CNTL_TYPE, R600_CB_CNTL); if (r) -- cgit v1.2.3 From fc47cb9d710c046d34e8238337e009d7b76a3207 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Fri, 6 Aug 2010 15:06:25 +1000 Subject: r600g: fixup z format translations. this enables GL_EXT_packed_depth_stencil. fbo-d24s8 passes --- src/gallium/drivers/r600/r600_state.c | 20 +------------------- src/gallium/drivers/r600/r600_state_inlines.h | 24 +++++++++++++++++++++++- 2 files changed, 24 insertions(+), 20 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index e43e4afe55..82145617ca 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -706,22 +706,6 @@ static int r600_cb0(struct r600_context *rctx, struct radeon_state *rstate) return radeon_state_pm4(rstate); } -static int r600_db_format(unsigned pformat, unsigned *format) -{ - switch (pformat) { - case PIPE_FORMAT_Z24X8_UNORM: - *format = V_028010_DEPTH_X8_24; - return 0; - case PIPE_FORMAT_Z24_UNORM_S8_USCALED: - *format = V_028010_DEPTH_8_24; - return 0; - default: - *format = V_028010_DEPTH_INVALID; - R600_ERR("unsupported %d\n", pformat); - return -EINVAL; - } -} - static int r600_db(struct r600_context *rctx, struct radeon_state *rstate) { struct r600_screen *rscreen = rctx->screen; @@ -746,9 +730,7 @@ static int r600_db(struct r600_context *rctx, struct radeon_state *rstate) level = state->zsbuf->level; pitch = (rtex->pitch[level] / rtex->bpt) / 8 - 1; slice = (rtex->pitch[level] / rtex->bpt) * state->zsbuf->height / 64 - 1; - if (r600_db_format(state->zsbuf->texture->format, &format)) { - return -EINVAL; - } + format = r600_translate_dbformat(state->zsbuf->texture->format); rstate->states[R600_DB__DB_DEPTH_BASE] = 0x00000000; rstate->states[R600_DB__DB_DEPTH_INFO] = 0x00010000 | S_028010_FORMAT(format); diff --git a/src/gallium/drivers/r600/r600_state_inlines.h b/src/gallium/drivers/r600/r600_state_inlines.h index b45089dcc1..4a955da1c0 100644 --- a/src/gallium/drivers/r600/r600_state_inlines.h +++ b/src/gallium/drivers/r600/r600_state_inlines.h @@ -129,6 +129,20 @@ static INLINE uint32_t r600_translate_ds_func(int func) return func; } +static uint32_t r600_translate_dbformat(enum pipe_format format) +{ + switch (format) { + case PIPE_FORMAT_Z16_UNORM: + return V_028010_DEPTH_16; + case PIPE_FORMAT_Z24X8_UNORM: + return V_028010_DEPTH_X8_24; + case PIPE_FORMAT_Z24_UNORM_S8_USCALED: + return V_028010_DEPTH_8_24; + default: + return ~0; + } +} + static uint32_t r600_translate_colorswap(enum pipe_format format) { switch (format) { @@ -168,6 +182,10 @@ static uint32_t r600_translate_colorswap(enum pipe_format format) // case PIPE_FORMAT_R8SG8SB8UX8U_NORM: return SWAP_STD_REV; + case PIPE_FORMAT_Z24X8_UNORM: + case PIPE_FORMAT_Z24_UNORM_S8_USCALED: + return SWAP_STD; + case PIPE_FORMAT_R10G10B10A2_UNORM: case PIPE_FORMAT_R10G10B10X2_SNORM: case PIPE_FORMAT_B10G10R10A2_UNORM: @@ -234,6 +252,10 @@ static INLINE uint32_t r600_translate_colorformat(enum pipe_format format) case PIPE_FORMAT_R10SG10SB10SA2U_NORM: return V_0280A0_COLOR_10_10_10_2; + case PIPE_FORMAT_Z24X8_UNORM: + case PIPE_FORMAT_Z24_UNORM_S8_USCALED: + return V_0280A0_COLOR_24_8; + /* 64-bit buffers. */ case PIPE_FORMAT_R16G16B16A16_UNORM: case PIPE_FORMAT_R16G16B16A16_SNORM: @@ -271,7 +293,7 @@ static INLINE boolean r600_is_colorbuffer_format_supported(enum pipe_format form static INLINE boolean r600_is_zs_format_supported(enum pipe_format format) { - return TRUE; + return r600_translate_dbformat(format) != ~0; } static INLINE boolean r600_is_vertex_format_supported(enum pipe_format format) -- cgit v1.2.3 From 0a2a6c75bffc56d8dfde9b8a46c40222825630ea Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Fri, 6 Aug 2010 15:21:44 +1000 Subject: r600g: add SRGB support. This enables GL2.1 and passes glean's texture_srgb test. --- src/gallium/drivers/r600/r600_state.c | 17 +++++++++++++---- src/gallium/drivers/r600/r600_state_inlines.h | 8 ++++++++ src/gallium/drivers/r600/r600d.h | 2 ++ 3 files changed, 23 insertions(+), 4 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index 82145617ca..1a8ec48936 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -669,8 +669,9 @@ static int r600_cb0(struct r600_context *rctx, struct radeon_state *rstate) unsigned level = state->cbufs[0]->level; unsigned pitch, slice; unsigned color_info; - unsigned format, swap; + unsigned format, swap, ntype; int r; + const struct util_format_description *desc; r = radeon_state_init(rstate, rscreen->rw, R600_CB0_TYPE, R600_CB0); if (r) @@ -688,12 +689,19 @@ static int r600_cb0(struct r600_context *rctx, struct radeon_state *rstate) pitch = (rtex->pitch[level] / rtex->bpt) / 8 - 1; slice = (rtex->pitch[level] / rtex->bpt) * state->cbufs[0]->height / 64 - 1; + ntype = 0; + desc = util_format_description(rtex->resource.base.b.format); + if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) + ntype = NUM_FORMAT_SRGB; + format = r600_translate_colorformat(rtex->resource.base.b.format); swap = r600_translate_colorswap(rtex->resource.base.b.format); + color_info = S_0280A0_FORMAT(format) | - S_0280A0_COMP_SWAP(swap) | - S_0280A0_BLEND_CLAMP(1) | - S_0280A0_SOURCE_FORMAT(1); + S_0280A0_COMP_SWAP(swap) | + S_0280A0_BLEND_CLAMP(1) | + S_0280A0_SOURCE_FORMAT(1) | + S_0280A0_NUMBER_TYPE(ntype); rstate->states[R600_CB0__CB_COLOR0_BASE] = 0x00000000; rstate->states[R600_CB0__CB_COLOR0_INFO] = color_info; @@ -1136,6 +1144,7 @@ static int r600_resource(struct r600_context *rctx, struct radeon_state *rstate, S_038010_DST_SEL_Y(r600_tex_swizzle(view->swizzle_g)) | S_038010_DST_SEL_Z(r600_tex_swizzle(view->swizzle_r)) | S_038010_DST_SEL_W(r600_tex_swizzle(view->swizzle_a)) | + S_038010_FORCE_DEGAMMA(desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB ? 1 : 0) | S_038010_BASE_LEVEL(view->first_level); rstate->states[R600_PS_RESOURCE__RESOURCE0_WORD5] = S_038014_LAST_LEVEL(view->last_level) | diff --git a/src/gallium/drivers/r600/r600_state_inlines.h b/src/gallium/drivers/r600/r600_state_inlines.h index 4a955da1c0..fdc29386ae 100644 --- a/src/gallium/drivers/r600/r600_state_inlines.h +++ b/src/gallium/drivers/r600/r600_state_inlines.h @@ -166,6 +166,12 @@ static uint32_t r600_translate_colorswap(enum pipe_format format) case PIPE_FORMAT_B4G4R4X4_UNORM: return SWAP_ALT; /* 32-bit buffers. */ + + case PIPE_FORMAT_A8B8G8R8_SRGB: + return SWAP_STD_REV; + case PIPE_FORMAT_B8G8R8A8_SRGB: + return SWAP_ALT; + case PIPE_FORMAT_B8G8R8A8_UNORM: case PIPE_FORMAT_B8G8R8X8_UNORM: return SWAP_ALT; @@ -244,6 +250,8 @@ static INLINE uint32_t r600_translate_colorformat(enum pipe_format format) case PIPE_FORMAT_X8B8G8R8_UNORM: case PIPE_FORMAT_R8G8B8X8_UNORM: case PIPE_FORMAT_R8SG8SB8UX8U_NORM: + case PIPE_FORMAT_A8B8G8R8_SRGB: + case PIPE_FORMAT_B8G8R8A8_SRGB: return V_0280A0_COLOR_8_8_8_8; case PIPE_FORMAT_R10G10B10A2_UNORM: diff --git a/src/gallium/drivers/r600/r600d.h b/src/gallium/drivers/r600/r600d.h index 2d0ede20fa..f9cad93185 100644 --- a/src/gallium/drivers/r600/r600d.h +++ b/src/gallium/drivers/r600/r600d.h @@ -1174,4 +1174,6 @@ #define SWAP_ALT 1 #define SWAP_STD_REV 2 #define SWAP_ALT_REV 3 + +#define NUM_FORMAT_SRGB 6 #endif -- cgit v1.2.3 From 524b2626c2d018f330ae7423c858ef73ea0424b5 Mon Sep 17 00:00:00 2001 From: Chia-I Wu Date: Sun, 1 Aug 2010 21:29:36 +0800 Subject: draw: Add draw_decompose_tmp.h. Including draw_decompose_tmp.h defines a primitive decomposer. It is intended to replace the existing vcache/so/gs/pipe decomposers. This is based on draw_pt_vcache_tmp.h. --- src/gallium/auxiliary/draw/draw_decompose_tmp.h | 425 ++++++++++++++++++++++++ 1 file changed, 425 insertions(+) create mode 100644 src/gallium/auxiliary/draw/draw_decompose_tmp.h (limited to 'src/gallium') diff --git a/src/gallium/auxiliary/draw/draw_decompose_tmp.h b/src/gallium/auxiliary/draw/draw_decompose_tmp.h new file mode 100644 index 0000000000..cb25202323 --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_decompose_tmp.h @@ -0,0 +1,425 @@ +/* + * Mesa 3-D graphics library + * Version: 7.9 + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * Copyright (C) 2010 LunarG Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Authors: + * Keith Whitwell + * Chia-I Wu + */ + +/* these macros are optional */ +#ifndef LOCAL_VARS +#define LOCAL_VARS do {} while (0) +#endif +#ifndef FUNC_ENTER +#define FUNC_ENTER do {} while (0) +#endif +#ifndef FUNC_EXIT +#define FUNC_EXIT do {} while (0) +#endif +#ifndef LINE_ADJ +#define LINE_ADJ(flags, a0, i0, i1, a1) LINE(flags, i0, i1) +#endif +#ifndef TRIANGLE_ADJ +#define TRIANGLE_ADJ(flags, i0, a0, i1, a1, i2, a2) TRIANGLE(flags, i0, i1, i2) +#endif + +static void +FUNC(FUNC_VARS) +{ + unsigned idx[6], i; + ushort flags; + LOCAL_VARS; + + FUNC_ENTER; + + /* prim, count, and last_vertex_last should have been defined */ + if (0) { + debug_printf("%s: prim 0x%x, count %d, last_vertex_last %d\n", + __FUNCTION__, prim, count, last_vertex_last); + } + + switch (prim) { + case PIPE_PRIM_POINTS: + for (i = 0; i < count; i++) { + idx[0] = GET_ELT(i); + POINT(idx[0]); + } + break; + + case PIPE_PRIM_LINES: + flags = DRAW_PIPE_RESET_STIPPLE; + for (i = 0; i + 1 < count; i += 2) { + idx[0] = GET_ELT(i); + idx[1] = GET_ELT(i + 1); + LINE(flags, idx[0], idx[1]); + } + break; + + case PIPE_PRIM_LINE_LOOP: + case PIPE_PRIM_LINE_STRIP: + if (count >= 2) { + flags = DRAW_PIPE_RESET_STIPPLE; + idx[1] = GET_ELT(0); + idx[2] = idx[1]; + + for (i = 1; i < count; i++, flags = 0) { + idx[0] = idx[1]; + idx[1] = GET_ELT(i); + LINE(flags, idx[0], idx[1]); + } + /* close the loop */ + if (prim == PIPE_PRIM_LINE_LOOP) + LINE(flags, idx[1], idx[2]); + } + break; + + case PIPE_PRIM_TRIANGLES: + flags = DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL; + for (i = 0; i + 2 < count; i += 3) { + idx[0] = GET_ELT(i); + idx[1] = GET_ELT(i + 1); + idx[2] = GET_ELT(i + 2); + TRIANGLE(flags, idx[0], idx[1], idx[2]); + } + break; + + case PIPE_PRIM_TRIANGLE_STRIP: + if (count >= 3) { + flags = DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL; + idx[1] = GET_ELT(0); + idx[2] = GET_ELT(1); + + if (last_vertex_last) { + for (i = 0; i + 2 < count; i++) { + idx[0] = idx[1]; + idx[1] = idx[2]; + idx[2] = GET_ELT(i + 2); + /* always emit idx[2] last */ + if (i & 1) + TRIANGLE(flags, idx[1], idx[0], idx[2]); + else + TRIANGLE(flags, idx[0], idx[1], idx[2]); + } + } + else { + for (i = 0; i + 2 < count; i++) { + idx[0] = idx[1]; + idx[1] = idx[2]; + idx[2] = GET_ELT(i + 2); + /* always emit idx[0] first */ + if (i & 1) + TRIANGLE(flags, idx[0], idx[2], idx[1]); + else + TRIANGLE(flags, idx[0], idx[1], idx[2]); + } + } + } + break; + + case PIPE_PRIM_TRIANGLE_FAN: + if (count >= 3) { + flags = DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL; + idx[0] = GET_ELT(0); + idx[2] = GET_ELT(1); + + /* idx[0] is neither the first nor the last vertex */ + if (last_vertex_last) { + for (i = 0; i + 2 < count; i++) { + idx[1] = idx[2]; + idx[2] = GET_ELT(i + 2); + /* always emit idx[2] last */ + TRIANGLE(flags, idx[0], idx[1], idx[2]); + } + } + else { + for (i = 0; i + 2 < count; i++) { + idx[1] = idx[2]; + idx[2] = GET_ELT(i + 2); + /* always emit idx[1] first */ + TRIANGLE(flags, idx[1], idx[2], idx[0]); + } + } + } + break; + + case PIPE_PRIM_QUADS: + if (last_vertex_last) { + for (i = 0; i + 3 < count; i += 4) { + idx[0] = GET_ELT(i); + idx[1] = GET_ELT(i + 1); + idx[2] = GET_ELT(i + 2); + idx[3] = GET_ELT(i + 3); + + flags = DRAW_PIPE_RESET_STIPPLE | + DRAW_PIPE_EDGE_FLAG_0 | + DRAW_PIPE_EDGE_FLAG_2; + /* always emit idx[3] last */ + TRIANGLE(flags, idx[0], idx[1], idx[3]); + + flags = DRAW_PIPE_EDGE_FLAG_0 | + DRAW_PIPE_EDGE_FLAG_1; + TRIANGLE(flags, idx[1], idx[2], idx[3]); + } + } + else { + for (i = 0; i + 3 < count; i += 4) { + idx[0] = GET_ELT(i); + idx[1] = GET_ELT(i + 1); + idx[2] = GET_ELT(i + 2); + idx[3] = GET_ELT(i + 3); + + flags = DRAW_PIPE_RESET_STIPPLE | + DRAW_PIPE_EDGE_FLAG_0 | + DRAW_PIPE_EDGE_FLAG_1; + /* XXX should always emit idx[0] first */ + /* always emit idx[3] first */ + TRIANGLE(flags, idx[3], idx[0], idx[1]); + + flags = DRAW_PIPE_EDGE_FLAG_1 | + DRAW_PIPE_EDGE_FLAG_2; + TRIANGLE(flags, idx[3], idx[1], idx[2]); + } + } + break; + + case PIPE_PRIM_QUAD_STRIP: + if (count >= 4) { + idx[2] = GET_ELT(0); + idx[3] = GET_ELT(1); + + if (last_vertex_last) { + for (i = 0; i + 3 < count; i += 2) { + idx[0] = idx[2]; + idx[1] = idx[3]; + idx[2] = GET_ELT(i + 2); + idx[3] = GET_ELT(i + 3); + + /* always emit idx[3] last */ + flags = DRAW_PIPE_RESET_STIPPLE | + DRAW_PIPE_EDGE_FLAG_0 | + DRAW_PIPE_EDGE_FLAG_2; + TRIANGLE(flags, idx[2], idx[0], idx[3]); + + flags = DRAW_PIPE_EDGE_FLAG_0 | + DRAW_PIPE_EDGE_FLAG_1; + TRIANGLE(flags, idx[0], idx[1], idx[3]); + } + } + else { + for (i = 0; i + 3 < count; i += 2) { + idx[0] = idx[2]; + idx[1] = idx[3]; + idx[2] = GET_ELT(i + 2); + idx[3] = GET_ELT(i + 3); + + flags = DRAW_PIPE_RESET_STIPPLE | + DRAW_PIPE_EDGE_FLAG_0 | + DRAW_PIPE_EDGE_FLAG_1; + /* XXX should always emit idx[0] first */ + /* always emit idx[3] first */ + TRIANGLE(flags, idx[3], idx[2], idx[0]); + + flags = DRAW_PIPE_EDGE_FLAG_1 | + DRAW_PIPE_EDGE_FLAG_2; + TRIANGLE(flags, idx[3], idx[0], idx[1]); + } + } + } + break; + + case PIPE_PRIM_POLYGON: + if (count >= 3) { + ushort edge_next, edge_finish; + + if (last_vertex_last) { + flags = (DRAW_PIPE_RESET_STIPPLE | + DRAW_PIPE_EDGE_FLAG_2 | + DRAW_PIPE_EDGE_FLAG_0); + edge_next = DRAW_PIPE_EDGE_FLAG_0; + edge_finish = DRAW_PIPE_EDGE_FLAG_1; + } + else { + flags = (DRAW_PIPE_RESET_STIPPLE | + DRAW_PIPE_EDGE_FLAG_0 | + DRAW_PIPE_EDGE_FLAG_1); + edge_next = DRAW_PIPE_EDGE_FLAG_1; + edge_finish = DRAW_PIPE_EDGE_FLAG_2; + } + + idx[0] = GET_ELT(0); + idx[2] = GET_ELT(1); + + for (i = 0; i + 2 < count; i++, flags = edge_next) { + idx[1] = idx[2]; + idx[2] = GET_ELT(i + 2); + + if (i + 3 == count) + flags |= edge_finish; + + /* idx[0] is both the first and the last vertex */ + if (last_vertex_last) + TRIANGLE(flags, idx[1], idx[2], idx[0]); + else + TRIANGLE(flags, idx[0], idx[1], idx[2]); + } + } + break; + + case PIPE_PRIM_LINES_ADJACENCY: + flags = DRAW_PIPE_RESET_STIPPLE; + for (i = 0; i + 3 < count; i += 4) { + idx[0] = GET_ELT(i); + idx[1] = GET_ELT(i + 1); + idx[2] = GET_ELT(i + 2); + idx[3] = GET_ELT(i + 3); + LINE_ADJ(flags, idx[0], idx[1], idx[2], idx[3]); + } + break; + + case PIPE_PRIM_LINE_STRIP_ADJACENCY: + if (count >= 4) { + flags = DRAW_PIPE_RESET_STIPPLE; + idx[1] = GET_ELT(0); + idx[2] = GET_ELT(1); + idx[3] = GET_ELT(2); + + for (i = 1; i + 2 < count; i++, flags = 0) { + idx[0] = idx[1]; + idx[1] = idx[2]; + idx[2] = idx[3]; + idx[3] = GET_ELT(i + 2); + LINE_ADJ(flags, idx[0], idx[1], idx[2], idx[3]); + } + } + break; + + case PIPE_PRIM_TRIANGLES_ADJACENCY: + flags = DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL; + for (i = 0; i + 5 < count; i += 6) { + idx[0] = GET_ELT(i); + idx[1] = GET_ELT(i + 1); + idx[2] = GET_ELT(i + 2); + idx[3] = GET_ELT(i + 3); + idx[4] = GET_ELT(i + 4); + idx[5] = GET_ELT(i + 5); + TRIANGLE_ADJ(flags, idx[0], idx[1], idx[2], idx[3], idx[4], idx[5]); + } + break; + + case PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY: + if (count >= 6) { + flags = DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL; + idx[0] = GET_ELT(1); + idx[2] = GET_ELT(0); + idx[4] = GET_ELT(2); + idx[3] = GET_ELT(4); + + /* + * The vertices of the i-th triangle are stored in + * idx[0,2,4] = { 2*i, 2*i+2, 2*i+4 }; + * + * The adjacent vertices are stored in + * idx[1,3,5] = { 2*i-2, 2*i+6, 2*i+3 }. + * + * However, there are two exceptions: + * + * For the first triangle, idx[1] = 1; + * For the last triangle, idx[3] = 2*i+5. + */ + if (last_vertex_last) { + for (i = 0; i + 5 < count; i += 2) { + idx[1] = idx[0]; + + idx[0] = idx[2]; + idx[2] = idx[4]; + idx[4] = idx[3]; + + idx[3] = GET_ELT(i + ((i + 7 < count) ? 6 : 5)); + idx[5] = GET_ELT(i + 3); + + /* + * alternate the first two vertices (idx[0] and idx[2]) and the + * corresponding adjacent vertices (idx[3] and idx[5]) to have + * the correct orientation + */ + if (i & 2) { + TRIANGLE_ADJ(flags, + idx[2], idx[1], idx[0], idx[5], idx[4], idx[3]); + } + else { + TRIANGLE_ADJ(flags, + idx[0], idx[1], idx[2], idx[3], idx[4], idx[5]); + } + } + } + else { + for (i = 0; i + 5 < count; i += 2) { + idx[1] = idx[0]; + + idx[0] = idx[2]; + idx[2] = idx[4]; + idx[4] = idx[3]; + + idx[3] = GET_ELT(i + ((i + 7 < count) ? 6 : 5)); + idx[5] = GET_ELT(i + 3); + + /* + * alternate the last two vertices (idx[2] and idx[4]) and the + * corresponding adjacent vertices (idx[1] and idx[5]) to have + * the correct orientation + */ + if (i & 2) { + TRIANGLE_ADJ(flags, + idx[0], idx[5], idx[4], idx[3], idx[2], idx[1]); + } + else { + TRIANGLE_ADJ(flags, + idx[0], idx[1], idx[2], idx[3], idx[4], idx[5]); + } + } + } + } + break; + + default: + assert(0); + break; + } + + FUNC_EXIT; +} + +#undef LOCAL_VARS +#undef FUNC_ENTER +#undef FUNC_EXIT +#undef LINE_ADJ +#undef TRIANGLE_ADJ + +#undef FUNC +#undef FUNC_VARS +#undef GET_ELT +#undef POINT +#undef LINE +#undef TRIANGLE -- cgit v1.2.3 From 8a41b18b7d731b5db2df5523dbe26143ae171c3a Mon Sep 17 00:00:00 2001 From: Chia-I Wu Date: Mon, 2 Aug 2010 01:21:08 +0800 Subject: draw: Include draw_decompose_tmp.h in draw_pt_vcache_tmp.h. Use draw_decompose_tmp.h to replace vcache primitive decomposer. As the new decomposer supports primitives with adjacency, vcache_triangle_adj and vcache_line_adj (and their variants that have flags) are added. --- src/gallium/auxiliary/draw/draw_pt_vcache.c | 135 ++++++++------- src/gallium/auxiliary/draw/draw_pt_vcache_tmp.h | 209 ++---------------------- 2 files changed, 92 insertions(+), 252 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/auxiliary/draw/draw_pt_vcache.c b/src/gallium/auxiliary/draw/draw_pt_vcache.c index d2fa1c6d4e..a848b54f7d 100644 --- a/src/gallium/auxiliary/draw/draw_pt_vcache.c +++ b/src/gallium/auxiliary/draw/draw_pt_vcache.c @@ -95,7 +95,7 @@ static INLINE void vcache_check_flush( struct vcache_frontend *vcache ) { if (vcache->draw_count + 6 >= DRAW_MAX || - vcache->fetch_count + 4 >= FETCH_MAX) { + vcache->fetch_count + 6 >= FETCH_MAX) { vcache_flush( vcache ); } } @@ -180,59 +180,61 @@ vcache_point( struct vcache_frontend *vcache, } -static INLINE void -vcache_quad( struct vcache_frontend *vcache, - unsigned i0, - unsigned i1, - unsigned i2, - unsigned i3 ) +static INLINE void +vcache_line_adj_flags( struct vcache_frontend *vcache, + unsigned flags, + unsigned a0, unsigned i0, unsigned i1, unsigned a1 ) { - if (vcache->draw->rasterizer->flatshade_first) { - /* pass last quad vertex as first triangle vertex */ - vcache_triangle( vcache, i3, i0, i1 ); - vcache_triangle( vcache, i3, i1, i2 ); - } - else { - /* pass last quad vertex as last triangle vertex */ - vcache_triangle( vcache, i0, i1, i3 ); - vcache_triangle( vcache, i1, i2, i3 ); - } + vcache_elt(vcache, a0, 0); + vcache_elt(vcache, i0, flags); + vcache_elt(vcache, i1, 0); + vcache_elt(vcache, a1, 0); + vcache_check_flush(vcache); } -static INLINE void -vcache_ef_quad( struct vcache_frontend *vcache, - unsigned i0, - unsigned i1, - unsigned i2, - unsigned i3 ) +static INLINE void +vcache_line_adj( struct vcache_frontend *vcache, + unsigned a0, unsigned i0, unsigned i1, unsigned a1 ) { - if (vcache->draw->rasterizer->flatshade_first) { - /* pass last quad vertex as first triangle vertex */ - vcache_triangle_flags( vcache, - ( DRAW_PIPE_RESET_STIPPLE | - DRAW_PIPE_EDGE_FLAG_0 | - DRAW_PIPE_EDGE_FLAG_1 ), - i3, i0, i1 ); - - vcache_triangle_flags( vcache, - ( DRAW_PIPE_EDGE_FLAG_1 | - DRAW_PIPE_EDGE_FLAG_2 ), - i3, i1, i2 ); - } - else { - /* pass last quad vertex as last triangle vertex */ - vcache_triangle_flags( vcache, - ( DRAW_PIPE_RESET_STIPPLE | - DRAW_PIPE_EDGE_FLAG_0 | - DRAW_PIPE_EDGE_FLAG_2 ), - i0, i1, i3 ); - - vcache_triangle_flags( vcache, - ( DRAW_PIPE_EDGE_FLAG_0 | - DRAW_PIPE_EDGE_FLAG_1 ), - i1, i2, i3 ); - } + vcache_elt(vcache, a0, 0); + vcache_elt(vcache, i0, 0); + vcache_elt(vcache, i1, 0); + vcache_elt(vcache, a1, 0); + vcache_check_flush(vcache); +} + + +static INLINE void +vcache_triangle_adj_flags( struct vcache_frontend *vcache, + unsigned flags, + unsigned i0, unsigned a0, + unsigned i1, unsigned a1, + unsigned i2, unsigned a2 ) +{ + vcache_elt(vcache, i0, flags); + vcache_elt(vcache, a0, 0); + vcache_elt(vcache, i1, 0); + vcache_elt(vcache, a1, 0); + vcache_elt(vcache, i2, 0); + vcache_elt(vcache, a2, 0); + vcache_check_flush(vcache); +} + + +static INLINE void +vcache_triangle_adj( struct vcache_frontend *vcache, + unsigned i0, unsigned a0, + unsigned i1, unsigned a1, + unsigned i2, unsigned a2 ) +{ + vcache_elt(vcache, i0, 0); + vcache_elt(vcache, a0, 0); + vcache_elt(vcache, i1, 0); + vcache_elt(vcache, a1, 0); + vcache_elt(vcache, i2, 0); + vcache_elt(vcache, a2, 0); + vcache_check_flush(vcache); } @@ -240,17 +242,23 @@ vcache_ef_quad( struct vcache_frontend *vcache, * this. The two paths aren't too different though - it may be * possible to reunify them. */ -#define TRIANGLE(vc,flags,i0,i1,i2) vcache_triangle_flags(vc,flags,i0,i1,i2) -#define QUAD(vc,i0,i1,i2,i3) vcache_ef_quad(vc,i0,i1,i2,i3) -#define LINE(vc,flags,i0,i1) vcache_line_flags(vc,flags,i0,i1) -#define POINT(vc,i0) vcache_point(vc,i0) +#define TRIANGLE(flags,i0,i1,i2) vcache_triangle_flags(vcache,flags,i0,i1,i2) +#define LINE(flags,i0,i1) vcache_line_flags(vcache,flags,i0,i1) +#define POINT(i0) vcache_point(vcache,i0) +#define LINE_ADJ(flags,a0,i0,i1,a1) \ + vcache_line_adj_flags(vcache,flags,a0,i0,i1,a1) +#define TRIANGLE_ADJ(flags,i0,a0,i1,a1,i2,a2) \ + vcache_triangle_adj_flags(vcache,flags,i0,a0,i1,a1,i2,a2) #define FUNC vcache_run_extras #include "draw_pt_vcache_tmp.h" -#define TRIANGLE(vc,flags,i0,i1,i2) vcache_triangle(vc,i0,i1,i2) -#define QUAD(vc,i0,i1,i2,i3) vcache_quad(vc,i0,i1,i2,i3) -#define LINE(vc,flags,i0,i1) vcache_line(vc,i0,i1) -#define POINT(vc,i0) vcache_point(vc,i0) +#define TRIANGLE(flags,i0,i1,i2) vcache_triangle(vcache,i0,i1,i2) +#define LINE(flags,i0,i1) vcache_line(vcache,i0,i1) +#define POINT(i0) vcache_point(vcache,i0) +#define LINE_ADJ(flags,a0,i0,i1,a1) \ + vcache_line_adj(vcache,a0,i0,i1,a1) +#define TRIANGLE_ADJ(flags,i0,a0,i1,a1,i2,a2) \ + vcache_triangle_adj(vcache,i0,a0,i1,a1,i2,a2) #define FUNC vcache_run #include "draw_pt_vcache_tmp.h" @@ -540,7 +548,18 @@ vcache_prepare( struct draw_pt_front_end *frontend, * which is a separate issue. */ vcache->input_prim = in_prim; - vcache->output_prim = u_reduced_prim(in_prim); + switch (in_prim) { + case PIPE_PRIM_LINES_ADJACENCY: + case PIPE_PRIM_LINE_STRIP_ADJACENCY: + vcache->output_prim = PIPE_PRIM_LINES_ADJACENCY; + break; + case PIPE_PRIM_TRIANGLES_ADJACENCY: + case PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY: + vcache->output_prim = PIPE_PRIM_TRIANGLES_ADJACENCY; + break; + default: + vcache->output_prim = u_reduced_prim(in_prim); + } vcache->middle = middle; vcache->opt = opt; diff --git a/src/gallium/auxiliary/draw/draw_pt_vcache_tmp.h b/src/gallium/auxiliary/draw/draw_pt_vcache_tmp.h index a42162691b..861ce1adaa 100644 --- a/src/gallium/auxiliary/draw/draw_pt_vcache_tmp.h +++ b/src/gallium/auxiliary/draw/draw_pt_vcache_tmp.h @@ -1,198 +1,19 @@ +#define FUNC_VARS \ + struct draw_pt_front_end *frontend, \ + pt_elt_func get_elt, \ + const void *elts, \ + int elt_bias, \ + unsigned count +#define LOCAL_VARS \ + struct vcache_frontend *vcache = (struct vcache_frontend *) frontend; \ + struct draw_context *draw = vcache->draw; \ + const unsigned prim = vcache->input_prim; \ + const boolean last_vertex_last = !(draw->rasterizer->flatshade && \ + draw->rasterizer->flatshade_first) -static void FUNC( struct draw_pt_front_end *frontend, - pt_elt_func get_elt, - const void *elts, - int elt_bias, - unsigned count ) -{ - struct vcache_frontend *vcache = (struct vcache_frontend *)frontend; - struct draw_context *draw = vcache->draw; +#define GET_ELT(idx) (get_elt(elts, idx) + elt_bias) - boolean flatfirst = (draw->rasterizer->flatshade && - draw->rasterizer->flatshade_first); - unsigned i; - ushort flags; +#define FUNC_EXIT do { vcache_flush(vcache); } while (0) - if (0) debug_printf("%s %d\n", __FUNCTION__, count); - - - switch (vcache->input_prim) { - case PIPE_PRIM_POINTS: - for (i = 0; i < count; i ++) { - POINT( vcache, - get_elt(elts, i + 0) + elt_bias ); - } - break; - - case PIPE_PRIM_LINES: - for (i = 0; i+1 < count; i += 2) { - LINE( vcache, - DRAW_PIPE_RESET_STIPPLE, - get_elt(elts, i + 0) + elt_bias, - get_elt(elts, i + 1) + elt_bias); - } - break; - - case PIPE_PRIM_LINE_LOOP: - if (count >= 2) { - flags = DRAW_PIPE_RESET_STIPPLE; - - for (i = 1; i < count; i++, flags = 0) { - LINE( vcache, - flags, - get_elt(elts, i - 1) + elt_bias, - get_elt(elts, i ) + elt_bias); - } - - LINE( vcache, - flags, - get_elt(elts, i - 1) + elt_bias, - get_elt(elts, 0 ) + elt_bias); - } - break; - - case PIPE_PRIM_LINE_STRIP: - flags = DRAW_PIPE_RESET_STIPPLE; - for (i = 1; i < count; i++, flags = 0) { - LINE( vcache, - flags, - get_elt(elts, i - 1) + elt_bias, - get_elt(elts, i ) + elt_bias); - } - break; - - case PIPE_PRIM_TRIANGLES: - for (i = 0; i+2 < count; i += 3) { - TRIANGLE( vcache, - DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL, - get_elt(elts, i + 0) + elt_bias, - get_elt(elts, i + 1) + elt_bias, - get_elt(elts, i + 2 ) + elt_bias); - } - break; - - case PIPE_PRIM_TRIANGLE_STRIP: - if (flatfirst) { - for (i = 0; i+2 < count; i++) { - TRIANGLE( vcache, - DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL, - get_elt(elts, i + 0) + elt_bias, - get_elt(elts, i + 1 + (i&1)) + elt_bias, - get_elt(elts, i + 2 - (i&1)) + elt_bias); - } - } - else { - for (i = 0; i+2 < count; i++) { - TRIANGLE( vcache, - DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL, - get_elt(elts, i + 0 + (i&1)) + elt_bias, - get_elt(elts, i + 1 - (i&1)) + elt_bias, - get_elt(elts, i + 2 ) + elt_bias); - } - } - break; - - case PIPE_PRIM_TRIANGLE_FAN: - if (count >= 3) { - if (flatfirst) { - for (i = 0; i+2 < count; i++) { - TRIANGLE( vcache, - DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL, - get_elt(elts, i + 1) + elt_bias, - get_elt(elts, i + 2) + elt_bias, - get_elt(elts, 0 ) + elt_bias); - } - } - else { - for (i = 0; i+2 < count; i++) { - TRIANGLE( vcache, - DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL, - get_elt(elts, 0) + elt_bias, - get_elt(elts, i + 1) + elt_bias, - get_elt(elts, i + 2 ) + elt_bias); - } - } - } - break; - - - case PIPE_PRIM_QUADS: - for (i = 0; i+3 < count; i += 4) { - QUAD( vcache, - get_elt(elts, i + 0) + elt_bias, - get_elt(elts, i + 1) + elt_bias, - get_elt(elts, i + 2) + elt_bias, - get_elt(elts, i + 3) + elt_bias ); - } - break; - - case PIPE_PRIM_QUAD_STRIP: - for (i = 0; i+3 < count; i += 2) { - QUAD( vcache, - get_elt(elts, i + 2) + elt_bias, - get_elt(elts, i + 0) + elt_bias, - get_elt(elts, i + 1) + elt_bias, - get_elt(elts, i + 3) + elt_bias ); - } - break; - - case PIPE_PRIM_POLYGON: - { - /* These bitflags look a little odd because we submit the - * vertices as (1,2,0) to satisfy flatshade requirements. - */ - ushort edge_next, edge_finish; - - if (flatfirst) { - flags = (DRAW_PIPE_RESET_STIPPLE | - DRAW_PIPE_EDGE_FLAG_0 | - DRAW_PIPE_EDGE_FLAG_1); - edge_next = DRAW_PIPE_EDGE_FLAG_1; - edge_finish = DRAW_PIPE_EDGE_FLAG_2; - } - else { - flags = (DRAW_PIPE_RESET_STIPPLE | - DRAW_PIPE_EDGE_FLAG_2 | - DRAW_PIPE_EDGE_FLAG_0); - edge_next = DRAW_PIPE_EDGE_FLAG_0; - edge_finish = DRAW_PIPE_EDGE_FLAG_1; - } - - for (i = 0; i+2 < count; i++, flags = edge_next) { - - if (i + 3 == count) - flags |= edge_finish; - - if (flatfirst) { - TRIANGLE( vcache, - flags, - get_elt(elts, 0) + elt_bias, - get_elt(elts, i + 1) + elt_bias, - get_elt(elts, i + 2) + elt_bias ); - } - else { - TRIANGLE( vcache, - flags, - get_elt(elts, i + 1) + elt_bias, - get_elt(elts, i + 2) + elt_bias, - get_elt(elts, 0) + elt_bias); - } - } - } - break; - - default: - assert(0); - break; - } - - vcache_flush( vcache ); -} - - -#undef TRIANGLE -#undef QUAD -#undef POINT -#undef LINE -#undef FUNC +#include "draw_decompose_tmp.h" -- cgit v1.2.3 From 23176779f88c5cff7365698f391194141e11e64c Mon Sep 17 00:00:00 2001 From: Chia-I Wu Date: Mon, 2 Aug 2010 01:38:58 +0800 Subject: draw: Include draw_decompose_tmp.h in draw_gs_tmp.h. Use draw_decompose_tmp.h to replace GS primitive decomposer. --- src/gallium/auxiliary/draw/draw_gs.c | 24 +--- src/gallium/auxiliary/draw/draw_gs_tmp.h | 186 ++++++------------------------- 2 files changed, 39 insertions(+), 171 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/auxiliary/draw/draw_gs.c b/src/gallium/auxiliary/draw/draw_gs.c index cff859a42b..f2535c5a54 100644 --- a/src/gallium/auxiliary/draw/draw_gs.c +++ b/src/gallium/auxiliary/draw/draw_gs.c @@ -373,28 +373,14 @@ static void gs_tri_adj(struct draw_geometry_shader *shader, gs_flush(shader, 1); } -#define TRIANGLE(gs,i0,i1,i2) gs_tri(gs,i0,i1,i2) -#define TRI_ADJ(gs,i0,i1,i2,i3,i4,i5) gs_tri_adj(gs,i0,i1,i2,i3,i4,i5) -#define LINE(gs,i0,i1) gs_line(gs,i0,i1) -#define LINE_ADJ(gs,i0,i1,i2,i3) gs_line_adj(gs,i0,i1,i2,i3) -#define POINT(gs,i0) gs_point(gs,i0) -#define FUNC gs_run -#define LOCAL_VARS +#define FUNC gs_run +#define GET_ELT(idx) (idx) #include "draw_gs_tmp.h" -#define TRIANGLE(gs,i0,i1,i2) gs_tri(gs,elts[i0],elts[i1],elts[i2]) -#define TRI_ADJ(gs,i0,i1,i2,i3,i4,i5) \ - gs_tri_adj(gs,elts[i0],elts[i1],elts[i2],elts[i3], \ - elts[i4],elts[i5]) -#define LINE(gs,i0,i1) gs_line(gs,elts[i0],elts[i1]) -#define LINE_ADJ(gs,i0,i1,i2,i3) gs_line_adj(gs,elts[i0], \ - elts[i1], \ - elts[i2],elts[i3]) -#define POINT(gs,i0) gs_point(gs,elts[i0]) -#define FUNC gs_run_elts -#define LOCAL_VARS \ - const ushort *elts = input_prims->elts; +#define FUNC gs_run_elts +#define LOCAL_VARS const ushort *elts = input_prims->elts; +#define GET_ELT(idx) (elts[idx]) #include "draw_gs_tmp.h" diff --git a/src/gallium/auxiliary/draw/draw_gs_tmp.h b/src/gallium/auxiliary/draw/draw_gs_tmp.h index 7a8683cf7c..4a17af0dea 100644 --- a/src/gallium/auxiliary/draw/draw_gs_tmp.h +++ b/src/gallium/auxiliary/draw/draw_gs_tmp.h @@ -1,152 +1,34 @@ - -static void FUNC( struct draw_geometry_shader *shader, - const struct draw_prim_info *input_prims, - const struct draw_vertex_info *input_verts, - struct draw_prim_info *output_prims, - struct draw_vertex_info *output_verts) -{ - struct draw_context *draw = shader->draw; - - boolean flatfirst = (draw->rasterizer->flatshade && - draw->rasterizer->flatshade_first); - unsigned i, j; - unsigned count = input_prims->count; - LOCAL_VARS - - if (0) debug_printf("%s %d\n", __FUNCTION__, count); - - debug_assert(input_prims->primitive_count == 1); - - switch (input_prims->prim) { - case PIPE_PRIM_POINTS: - for (i = 0; i < count; i++) { - POINT( shader, i + 0 ); - } - break; - - case PIPE_PRIM_LINES: - for (i = 0; i+1 < count; i += 2) { - LINE( shader , i + 0 , i + 1 ); - } - break; - - case PIPE_PRIM_LINE_LOOP: - if (count >= 2) { - - for (i = 1; i < count; i++) { - LINE( shader, i - 1, i ); - } - - LINE( shader, i - 1, 0 ); - } - break; - - case PIPE_PRIM_LINE_STRIP: - for (i = 1; i < count; i++) { - LINE( shader, i - 1, i ); - } - break; - - case PIPE_PRIM_TRIANGLES: - for (i = 0; i+2 < count; i += 3) { - TRIANGLE( shader, i + 0, i + 1, i + 2 ); - } - break; - - case PIPE_PRIM_TRIANGLE_STRIP: - if (flatfirst) { - for (i = 0; i+2 < count; i++) { - TRIANGLE( shader, - i + 0, - i + 1 + (i&1), - i + 2 - (i&1) ); - } - } - else { - for (i = 0; i+2 < count; i++) { - TRIANGLE( shader, - i + 0 + (i&1), - i + 1 - (i&1), - i + 2 ); - } - } - break; - - case PIPE_PRIM_TRIANGLE_FAN: - if (count >= 3) { - if (flatfirst) { - for (i = 0; i+2 < count; i++) { - TRIANGLE( shader, - i + 1, - i + 2, - 0 ); - } - } - else { - for (i = 0; i+2 < count; i++) { - TRIANGLE( shader, - 0, - i + 1, - i + 2 ); - } - } - } - break; - - case PIPE_PRIM_POLYGON: - { - for (i = 0; i+2 < count; i++) { - - if (flatfirst) { - TRIANGLE( shader, 0, i + 1, i + 2 ); - } - else { - TRIANGLE( shader, i + 1, i + 2, 0 ); - } - } - } - break; - - case PIPE_PRIM_LINES_ADJACENCY: - for (i = 0; i+3 < count; i += 4) { - LINE_ADJ( shader , i + 0 , i + 1, i + 2, i + 3 ); - } - break; - case PIPE_PRIM_LINE_STRIP_ADJACENCY: - for (i = 1; i + 2 < count; i++) { - LINE_ADJ( shader, i - 1, i, i + 1, i + 2 ); - } - break; - - case PIPE_PRIM_TRIANGLES_ADJACENCY: - for (i = 0; i+5 < count; i += 5) { - TRI_ADJ( shader, i + 0, i + 1, i + 2, - i + 3, i + 4, i + 5); - } - break; - case PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY: - for (i = 0, j = 0; i+5 < count; i += 2, ++j) { - TRI_ADJ( shader, - i + 0, - i + 1 + 2*(j&1), - i + 2 + 2*(j&1), - i + 3 - 2*(j&1), - i + 4 - 2*(j&1), - i + 5); - } - break; - - default: - debug_assert(!"Unsupported primitive in geometry shader"); - break; - } -} - - -#undef TRIANGLE -#undef TRI_ADJ -#undef POINT -#undef LINE -#undef LINE_ADJ -#undef FUNC -#undef LOCAL_VARS +#define FUNC_VARS struct draw_geometry_shader *gs, \ + const struct draw_prim_info *input_prims, \ + const struct draw_vertex_info *input_verts, \ + struct draw_prim_info *output_prims, \ + struct draw_vertex_info *output_verts + +#define FUNC_ENTER \ + /* declare more local vars */ \ + struct draw_context *draw = gs->draw; \ + const unsigned prim = input_prims->prim; \ + const unsigned count = input_prims->count; \ + const boolean last_vertex_last = \ + !(draw->rasterizer->flatshade && \ + draw->rasterizer->flatshade_first); \ + do { \ + debug_assert(input_prims->primitive_count == 1); \ + switch (prim) { \ + case PIPE_PRIM_QUADS: \ + case PIPE_PRIM_QUAD_STRIP: \ + case PIPE_PRIM_POLYGON: \ + debug_assert(!"unexpected primitive type in GS"); \ + return; \ + default: \ + break; \ + } \ + } while (0) \ + +#define POINT(i0) gs_point(gs,i0) +#define LINE(flags,i0,i1) gs_line(gs,i0,i1) +#define TRIANGLE(flags,i0,i1,i2) gs_tri(gs,i0,i1,i2) +#define LINE_ADJ(flags,i0,i1,i2,i3) gs_line_adj(gs,i0,i1,i2,i3) +#define TRIANGLE_ADJ(flags,i0,i1,i2,i3,i4,i5) gs_tri_adj(gs,i0,i1,i2,i3,i4,i5) + +#include "draw_decompose_tmp.h" -- cgit v1.2.3 From 94d256591d83ac1330f7237e865784618d124d09 Mon Sep 17 00:00:00 2001 From: Chia-I Wu Date: Mon, 2 Aug 2010 01:38:58 +0800 Subject: draw: Include draw_decompose_tmp.h in draw_so_emit_tmp.h. Use draw_decompose_tmp.h to replace stream out primitive decomposer. --- src/gallium/auxiliary/draw/draw_pt_so_emit.c | 20 +--- src/gallium/auxiliary/draw/draw_so_emit_tmp.h | 156 ++++++-------------------- 2 files changed, 38 insertions(+), 138 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/auxiliary/draw/draw_pt_so_emit.c b/src/gallium/auxiliary/draw/draw_pt_so_emit.c index 5d82934889..c86bdd99a3 100644 --- a/src/gallium/auxiliary/draw/draw_pt_so_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_so_emit.c @@ -218,25 +218,15 @@ static void so_tri(struct pt_so_emit *so, int i0, int i1, int i2) } -#define TRIANGLE(gs,i0,i1,i2) so_tri(so,i0,i1,i2) -#define LINE(gs,i0,i1) so_line(so,i0,i1) -#define POINT(gs,i0) so_point(so,i0) -#define FUNC so_run_linear -#define LOCAL_VARS +#define FUNC so_run_linear +#define GET_ELT(idx) (start + (idx)) #include "draw_so_emit_tmp.h" -#undef LOCAL_VARS -#undef FUNC -#define TRIANGLE(gs,i0,i1,i2) so_tri(gs,elts[i0],elts[i1],elts[i2]) -#define LINE(gs,i0,i1) so_line(gs,elts[i0],elts[i1]) -#define POINT(gs,i0) so_point(gs,elts[i0]) -#define FUNC so_run_elts -#define LOCAL_VARS \ - const ushort *elts = input_prims->elts; +#define FUNC so_run_elts +#define LOCAL_VARS const ushort *elts = input_prims->elts; +#define GET_ELT(idx) (elts[start + (idx)]) #include "draw_so_emit_tmp.h" -#undef LOCAL_VARS -#undef FUNC void draw_pt_so_emit( struct pt_so_emit *emit, diff --git a/src/gallium/auxiliary/draw/draw_so_emit_tmp.h b/src/gallium/auxiliary/draw/draw_so_emit_tmp.h index 01212a8e53..6d8937a0b4 100644 --- a/src/gallium/auxiliary/draw/draw_so_emit_tmp.h +++ b/src/gallium/auxiliary/draw/draw_so_emit_tmp.h @@ -1,123 +1,33 @@ - -static void FUNC( struct pt_so_emit *so, - const struct draw_prim_info *input_prims, - const struct draw_vertex_info *input_verts, - unsigned start, - unsigned count) -{ - struct draw_context *draw = so->draw; - - boolean flatfirst = (draw->rasterizer->flatshade && - draw->rasterizer->flatshade_first); - unsigned i; - LOCAL_VARS - - if (0) debug_printf("%s %d\n", __FUNCTION__, count); - - debug_assert(input_prims->primitive_count == 1); - - switch (input_prims->prim) { - case PIPE_PRIM_POINTS: - for (i = 0; i < count; i++) { - POINT( so, start + i + 0 ); - } - break; - - case PIPE_PRIM_LINES: - for (i = 0; i+1 < count; i += 2) { - LINE( so , start + i + 0 , start + i + 1 ); - } - break; - - case PIPE_PRIM_LINE_LOOP: - if (count >= 2) { - - for (i = 1; i < count; i++) { - LINE( so, start + i - 1, start + i ); - } - - LINE( so, start + i - 1, start ); - } - break; - - case PIPE_PRIM_LINE_STRIP: - for (i = 1; i < count; i++) { - LINE( so, start + i - 1, start + i ); - } - break; - - case PIPE_PRIM_TRIANGLES: - for (i = 0; i+2 < count; i += 3) { - TRIANGLE( so, start + i + 0, start + i + 1, start + i + 2 ); - } - break; - - case PIPE_PRIM_TRIANGLE_STRIP: - if (flatfirst) { - for (i = 0; i+2 < count; i++) { - TRIANGLE( so, - start + i + 0, - start + i + 1 + (i&1), - start + i + 2 - (i&1) ); - } - } - else { - for (i = 0; i+2 < count; i++) { - TRIANGLE( so, - start + i + 0 + (i&1), - start + i + 1 - (i&1), - start + i + 2 ); - } - } - break; - - case PIPE_PRIM_TRIANGLE_FAN: - if (count >= 3) { - if (flatfirst) { - for (i = 0; i+2 < count; i++) { - TRIANGLE( so, - start + i + 1, - start + i + 2, - start ); - } - } - else { - for (i = 0; i+2 < count; i++) { - TRIANGLE( so, - start, - start + i + 1, - start + i + 2 ); - } - } - } - break; - - case PIPE_PRIM_POLYGON: - { - /* These bitflags look a little odd because we submit the - * vertices as (1,2,0) to satisfy flatshade requirements. - */ - - for (i = 0; i+2 < count; i++) { - - if (flatfirst) { - TRIANGLE( so, start + 0, start + i + 1, start + i + 2 ); - } - else { - TRIANGLE( so, start + i + 1, start + i + 2, start + 0 ); - } - } - } - break; - - default: - debug_assert(!"Unsupported primitive in stream output"); - break; - } -} - - -#undef TRIANGLE -#undef POINT -#undef LINE -#undef FUNC +#define FUNC_VARS \ + struct pt_so_emit *so, \ + const struct draw_prim_info *input_prims, \ + const struct draw_vertex_info *input_verts, \ + unsigned start, \ + unsigned count + +#define FUNC_ENTER \ + /* declare more local vars */ \ + struct draw_context *draw = so->draw; \ + const unsigned prim = input_prims->prim; \ + const boolean last_vertex_last = \ + !(draw->rasterizer->flatshade && \ + draw->rasterizer->flatshade_first); \ + do { \ + debug_assert(input_prims->primitive_count == 1); \ + switch (prim) { \ + case PIPE_PRIM_LINES_ADJACENCY: \ + case PIPE_PRIM_LINE_STRIP_ADJACENCY: \ + case PIPE_PRIM_TRIANGLES_ADJACENCY: \ + case PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY: \ + debug_assert(!"unexpected primitive type in stream output"); \ + return; \ + default: \ + break; \ + } \ + } while (0) \ + +#define POINT(i0) so_point(so,i0) +#define LINE(flags,i0,i1) so_line(so,i0,i1) +#define TRIANGLE(flags,i0,i1,i2) so_tri(so,i0,i1,i2) + +#include "draw_decompose_tmp.h" -- cgit v1.2.3 From eb3c6ddafb7a1b544243e9dec991cc24d16940ea Mon Sep 17 00:00:00 2001 From: Chia-I Wu Date: Mon, 2 Aug 2010 01:38:58 +0800 Subject: draw: Include draw_decompose_tmp.h in draw_pt_decompose.h. Use draw_decompose_tmp.h to replace pipeline primitive decomposer. --- src/gallium/auxiliary/draw/draw_pipe.c | 133 ++++------------- src/gallium/auxiliary/draw/draw_pt_decompose.h | 199 +------------------------ 2 files changed, 36 insertions(+), 296 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/auxiliary/draw/draw_pipe.c b/src/gallium/auxiliary/draw/draw_pipe.c index 144f10a5d0..3421b826d9 100644 --- a/src/gallium/auxiliary/draw/draw_pipe.c +++ b/src/gallium/auxiliary/draw/draw_pipe.c @@ -169,77 +169,41 @@ static void do_triangle( struct draw_context *draw, /* * Set up macros for draw_pt_decompose.h template code. * This code uses vertex indexes / elements. + * + * Flags are needed by the stipple and unfilled stages. When the two stages + * are active, vcache_run_extras is called and the flags are stored in the + * higher bits of i0. Otherwise, flags do not matter. */ -/* emit first quad vertex as first vertex in triangles */ -#define QUAD_FIRST_PV(i0,i1,i2,i3) \ - do_triangle( draw, \ - ( DRAW_PIPE_RESET_STIPPLE | \ - DRAW_PIPE_EDGE_FLAG_0 | \ - DRAW_PIPE_EDGE_FLAG_1 ), \ - verts + stride * (elts[i0] & ~DRAW_PIPE_FLAG_MASK), \ - verts + stride * (elts[i1] & ~DRAW_PIPE_FLAG_MASK), \ - verts + stride * (elts[i2] & ~DRAW_PIPE_FLAG_MASK)); \ - do_triangle( draw, \ - ( DRAW_PIPE_EDGE_FLAG_1 | \ - DRAW_PIPE_EDGE_FLAG_2 ), \ - verts + stride * (elts[i0] & ~DRAW_PIPE_FLAG_MASK), \ - verts + stride * (elts[i2] & ~DRAW_PIPE_FLAG_MASK), \ - verts + stride * (elts[i3] & ~DRAW_PIPE_FLAG_MASK)) - -/* emit last quad vertex as last vertex in triangles */ -#define QUAD_LAST_PV(i0,i1,i2,i3) \ - do_triangle( draw, \ - ( DRAW_PIPE_RESET_STIPPLE | \ - DRAW_PIPE_EDGE_FLAG_0 | \ - DRAW_PIPE_EDGE_FLAG_2 ), \ - verts + stride * (elts[i0] & ~DRAW_PIPE_FLAG_MASK), \ - verts + stride * (elts[i1] & ~DRAW_PIPE_FLAG_MASK), \ - verts + stride * (elts[i3] & ~DRAW_PIPE_FLAG_MASK)); \ - do_triangle( draw, \ - ( DRAW_PIPE_EDGE_FLAG_0 | \ - DRAW_PIPE_EDGE_FLAG_1 ), \ - verts + stride * (elts[i1] & ~DRAW_PIPE_FLAG_MASK), \ - verts + stride * (elts[i2] & ~DRAW_PIPE_FLAG_MASK), \ - verts + stride * (elts[i3] & ~DRAW_PIPE_FLAG_MASK)) - -#define TRIANGLE(flags,i0,i1,i2) \ - do_triangle( draw, \ - elts[i0], /* flags */ \ - verts + stride * (elts[i0] & ~DRAW_PIPE_FLAG_MASK), \ - verts + stride * (elts[i1] & ~DRAW_PIPE_FLAG_MASK), \ - verts + stride * (elts[i2] & ~DRAW_PIPE_FLAG_MASK) ); - -#define LINE(flags,i0,i1) \ - do_line( draw, \ - elts[i0], \ - verts + stride * (elts[i0] & ~DRAW_PIPE_FLAG_MASK), \ - verts + stride * (elts[i1] & ~DRAW_PIPE_FLAG_MASK) ); +#define TRIANGLE(flags,i0,i1,i2) \ + do_triangle( draw, \ + i0, /* flags */ \ + verts + stride * (i0 & ~DRAW_PIPE_FLAG_MASK), \ + verts + stride * (i1 & ~DRAW_PIPE_FLAG_MASK), \ + verts + stride * (i2 & ~DRAW_PIPE_FLAG_MASK) ) + +#define LINE(flags,i0,i1) \ + do_line( draw, \ + i0, /* flags */ \ + verts + stride * (i0 & ~DRAW_PIPE_FLAG_MASK), \ + verts + stride * (i1 & ~DRAW_PIPE_FLAG_MASK) ) #define POINT(i0) \ do_point( draw, \ - verts + stride * (elts[i0] & ~DRAW_PIPE_FLAG_MASK) ) + verts + stride * (i0 & ~DRAW_PIPE_FLAG_MASK) ) + +#define GET_ELT(idx) (elts[idx]) #define FUNC pipe_run_elts -#define ARGS \ +#define FUNC_VARS \ struct draw_context *draw, \ unsigned prim, \ struct vertex_header *vertices, \ unsigned stride, \ - const ushort *elts - -#define LOCAL_VARS \ - char *verts = (char *)vertices; \ - boolean flatfirst = (draw->rasterizer->flatshade && \ - draw->rasterizer->flatshade_first); \ - unsigned i; \ - ushort flags - -#define FLUSH + const ushort *elts, \ + unsigned count #include "draw_pt_decompose.h" -#undef ARGS -#undef LOCAL_VARS @@ -304,38 +268,6 @@ void draw_pipeline_run( struct draw_context *draw, * This code is for non-indexed (aka linear) rendering (no elts). */ -/* emit first quad vertex as first vertex in triangles */ -#define QUAD_FIRST_PV(i0,i1,i2,i3) \ - do_triangle( draw, \ - ( DRAW_PIPE_RESET_STIPPLE | \ - DRAW_PIPE_EDGE_FLAG_0 | \ - DRAW_PIPE_EDGE_FLAG_1 ), \ - verts + stride * ((i0) & ~DRAW_PIPE_FLAG_MASK), \ - verts + stride * ((i1) & ~DRAW_PIPE_FLAG_MASK), \ - verts + stride * ((i2) & ~DRAW_PIPE_FLAG_MASK)); \ - do_triangle( draw, \ - ( DRAW_PIPE_EDGE_FLAG_1 | \ - DRAW_PIPE_EDGE_FLAG_2 ), \ - verts + stride * ((i0) & ~DRAW_PIPE_FLAG_MASK), \ - verts + stride * ((i2) & ~DRAW_PIPE_FLAG_MASK), \ - verts + stride * ((i3) & ~DRAW_PIPE_FLAG_MASK)) - -/* emit last quad vertex as last vertex in triangles */ -#define QUAD_LAST_PV(i0,i1,i2,i3) \ - do_triangle( draw, \ - ( DRAW_PIPE_RESET_STIPPLE | \ - DRAW_PIPE_EDGE_FLAG_0 | \ - DRAW_PIPE_EDGE_FLAG_2 ), \ - verts + stride * ((i0) & ~DRAW_PIPE_FLAG_MASK), \ - verts + stride * ((i1) & ~DRAW_PIPE_FLAG_MASK), \ - verts + stride * ((i3) & ~DRAW_PIPE_FLAG_MASK)); \ - do_triangle( draw, \ - ( DRAW_PIPE_EDGE_FLAG_0 | \ - DRAW_PIPE_EDGE_FLAG_1 ), \ - verts + stride * ((i1) & ~DRAW_PIPE_FLAG_MASK), \ - verts + stride * ((i2) & ~DRAW_PIPE_FLAG_MASK), \ - verts + stride * ((i3) & ~DRAW_PIPE_FLAG_MASK)) - #define TRIANGLE(flags,i0,i1,i2) \ do_triangle( draw, \ flags, /* flags */ \ @@ -353,21 +285,16 @@ void draw_pipeline_run( struct draw_context *draw, do_point( draw, \ verts + stride * ((i0) & ~DRAW_PIPE_FLAG_MASK) ) -#define FUNC pipe_run_linear -#define ARGS \ - struct draw_context *draw, \ - unsigned prim, \ - struct vertex_header *vertices, \ - unsigned stride -#define LOCAL_VARS \ - char *verts = (char *)vertices; \ - boolean flatfirst = (draw->rasterizer->flatshade && \ - draw->rasterizer->flatshade_first); \ - unsigned i; \ - ushort flags +#define GET_ELT(idx) (idx) -#define FLUSH +#define FUNC pipe_run_linear +#define FUNC_VARS \ + struct draw_context *draw, \ + unsigned prim, \ + struct vertex_header *vertices, \ + unsigned stride, \ + unsigned count #include "draw_pt_decompose.h" diff --git a/src/gallium/auxiliary/draw/draw_pt_decompose.h b/src/gallium/auxiliary/draw/draw_pt_decompose.h index 52f9593d46..e7ae9c4449 100644 --- a/src/gallium/auxiliary/draw/draw_pt_decompose.h +++ b/src/gallium/auxiliary/draw/draw_pt_decompose.h @@ -1,194 +1,7 @@ +#define LOCAL_VARS \ + char *verts = (char *) vertices; \ + const boolean last_vertex_last = \ + !(draw->rasterizer->flatshade && \ + draw->rasterizer->flatshade_first) - -static void FUNC( ARGS, - unsigned count ) -{ - LOCAL_VARS; - - switch (prim) { - case PIPE_PRIM_POINTS: - for (i = 0; i < count; i ++) { - POINT( (i + 0) ); - } - break; - - case PIPE_PRIM_LINES: - for (i = 0; i+1 < count; i += 2) { - LINE( DRAW_PIPE_RESET_STIPPLE, - (i + 0), - (i + 1)); - } - break; - - case PIPE_PRIM_LINE_LOOP: - if (count >= 2) { - flags = DRAW_PIPE_RESET_STIPPLE; - - for (i = 1; i < count; i++, flags = 0) { - LINE( flags, - (i - 1), - (i )); - } - - LINE( flags, - (i - 1), - (0 )); - } - break; - - case PIPE_PRIM_LINE_STRIP: - flags = DRAW_PIPE_RESET_STIPPLE; - for (i = 1; i < count; i++, flags = 0) { - LINE( flags, - (i - 1), - (i )); - } - break; - - case PIPE_PRIM_TRIANGLES: - for (i = 0; i+2 < count; i += 3) { - TRIANGLE( DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL, - (i + 0), - (i + 1), - (i + 2 )); - } - break; - - case PIPE_PRIM_TRIANGLE_STRIP: - if (flatfirst) { - for (i = 0; i+2 < count; i++) { - /* Emit first triangle vertex as first triangle vertex */ - TRIANGLE( DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL, - (i + 0), - (i + 1 + (i&1)), - (i + 2 - (i&1)) ); - } - } - else { - for (i = 0; i+2 < count; i++) { - /* Emit last triangle vertex as last triangle vertex */ - TRIANGLE( DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL, - (i + 0 + (i&1)), - (i + 1 - (i&1)), - (i + 2 )); - } - } - break; - - case PIPE_PRIM_TRIANGLE_FAN: - if (count >= 3) { - if (flatfirst) { - for (i = 0; i+2 < count; i++) { - TRIANGLE( DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL, - (i + 1), - (i + 2), - 0 ); - } - } - else { - for (i = 0; i+2 < count; i++) { - TRIANGLE( DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL, - (0), - (i + 1), - (i + 2 )); - } - } - } - break; - - - case PIPE_PRIM_QUADS: - /* GL quads don't follow provoking vertex convention */ - if (flatfirst) { - for (i = 0; i+3 < count; i += 4) { - /* emit last quad vertex as first triangle vertex */ - QUAD_FIRST_PV( (i + 3), - (i + 0), - (i + 1), - (i + 2) ); - } - } - else { - for (i = 0; i+3 < count; i += 4) { - /* emit last quad vertex as last triangle vertex */ - QUAD_LAST_PV( (i + 0), - (i + 1), - (i + 2), - (i + 3) ); - } - } - break; - - case PIPE_PRIM_QUAD_STRIP: - /* GL quad strips don't follow provoking vertex convention */ - if (flatfirst) { - for (i = 0; i+3 < count; i += 2) { - /* emit last quad vertex as first triangle vertex */ - QUAD_FIRST_PV( (i + 3), - (i + 2), - (i + 0), - (i + 1) ); - - } - } - else { - for (i = 0; i+3 < count; i += 2) { - /* emit last quad vertex as last triangle vertex */ - QUAD_LAST_PV( (i + 2), - (i + 0), - (i + 1), - (i + 3) ); - } - } - break; - - case PIPE_PRIM_POLYGON: - /* GL polygons don't follow provoking vertex convention */ - { - /* These bitflags look a little odd because we submit the - * vertices as (1,2,0) to satisfy flatshade requirements. - */ - const ushort edge_first = DRAW_PIPE_EDGE_FLAG_2; - const ushort edge_middle = DRAW_PIPE_EDGE_FLAG_0; - const ushort edge_last = DRAW_PIPE_EDGE_FLAG_1; - - flags = DRAW_PIPE_RESET_STIPPLE | edge_first | edge_middle; - - for (i = 0; i+2 < count; i++, flags = edge_middle) { - - if (i + 3 == count) - flags |= edge_last; - - if (flatfirst) { - /* emit first polygon vertex as first triangle vertex */ - TRIANGLE( flags, - (0), - (i + 1), - (i + 2) ); - } - else { - /* emit first polygon vertex as last triangle vertex */ - TRIANGLE( flags, - (i + 1), - (i + 2), - (0)); - } - } - } - break; - - default: - assert(0); - break; - } - - FLUSH; -} - - -#undef TRIANGLE -#undef QUAD_FIRST_PV -#undef QUAD_LAST_PV -#undef POINT -#undef LINE -#undef FUNC +#include "draw_decompose_tmp.h" -- cgit v1.2.3 From f1fc444bb05421404f42e8d08e9085c6fb6ce7a9 Mon Sep 17 00:00:00 2001 From: Chia-I Wu Date: Wed, 4 Aug 2010 14:39:16 +0800 Subject: draw: Mask out vertex flags in GS and stream output. This fixes out-of-bound access to the vertices. --- src/gallium/auxiliary/draw/draw_gs.c | 2 +- src/gallium/auxiliary/draw/draw_pt_so_emit.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/auxiliary/draw/draw_gs.c b/src/gallium/auxiliary/draw/draw_gs.c index f2535c5a54..4a1013e79a 100644 --- a/src/gallium/auxiliary/draw/draw_gs.c +++ b/src/gallium/auxiliary/draw/draw_gs.c @@ -380,7 +380,7 @@ static void gs_tri_adj(struct draw_geometry_shader *shader, #define FUNC gs_run_elts #define LOCAL_VARS const ushort *elts = input_prims->elts; -#define GET_ELT(idx) (elts[idx]) +#define GET_ELT(idx) (elts[idx] & ~DRAW_PIPE_FLAG_MASK) #include "draw_gs_tmp.h" diff --git a/src/gallium/auxiliary/draw/draw_pt_so_emit.c b/src/gallium/auxiliary/draw/draw_pt_so_emit.c index c86bdd99a3..f7f4f24d35 100644 --- a/src/gallium/auxiliary/draw/draw_pt_so_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_so_emit.c @@ -225,7 +225,7 @@ static void so_tri(struct pt_so_emit *so, int i0, int i1, int i2) #define FUNC so_run_elts #define LOCAL_VARS const ushort *elts = input_prims->elts; -#define GET_ELT(idx) (elts[start + (idx)]) +#define GET_ELT(idx) (elts[start + (idx)] & ~DRAW_PIPE_FLAG_MASK) #include "draw_so_emit_tmp.h" -- cgit v1.2.3 From 642d5ba79abc6a231a5fdabb3454b9b082b0d7f8 Mon Sep 17 00:00:00 2001 From: Chia-I Wu Date: Wed, 4 Aug 2010 14:37:40 +0800 Subject: draw: Remove unnecessary vertex flag ANDs. Vertex flags are a contract between vcache and the pipeline. They are set only for the first vertex of a primitive. --- src/gallium/auxiliary/draw/draw_pipe.c | 34 +++++++++++++++------------------- 1 file changed, 15 insertions(+), 19 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/auxiliary/draw/draw_pipe.c b/src/gallium/auxiliary/draw/draw_pipe.c index 3421b826d9..070ac803c8 100644 --- a/src/gallium/auxiliary/draw/draw_pipe.c +++ b/src/gallium/auxiliary/draw/draw_pipe.c @@ -179,18 +179,17 @@ static void do_triangle( struct draw_context *draw, do_triangle( draw, \ i0, /* flags */ \ verts + stride * (i0 & ~DRAW_PIPE_FLAG_MASK), \ - verts + stride * (i1 & ~DRAW_PIPE_FLAG_MASK), \ - verts + stride * (i2 & ~DRAW_PIPE_FLAG_MASK) ) + verts + stride * (i1), \ + verts + stride * (i2) ) #define LINE(flags,i0,i1) \ do_line( draw, \ i0, /* flags */ \ verts + stride * (i0 & ~DRAW_PIPE_FLAG_MASK), \ - verts + stride * (i1 & ~DRAW_PIPE_FLAG_MASK) ) + verts + stride * (i1) ) #define POINT(i0) \ - do_point( draw, \ - verts + stride * (i0 & ~DRAW_PIPE_FLAG_MASK) ) + do_point( draw, verts + stride * (i0) ) #define GET_ELT(idx) (elts[idx]) @@ -268,22 +267,19 @@ void draw_pipeline_run( struct draw_context *draw, * This code is for non-indexed (aka linear) rendering (no elts). */ -#define TRIANGLE(flags,i0,i1,i2) \ - do_triangle( draw, \ - flags, /* flags */ \ - verts + stride * ((i0) & ~DRAW_PIPE_FLAG_MASK), \ - verts + stride * ((i1) & ~DRAW_PIPE_FLAG_MASK), \ - verts + stride * ((i2) & ~DRAW_PIPE_FLAG_MASK)) +#define TRIANGLE(flags,i0,i1,i2) \ + do_triangle( draw, flags, \ + verts + stride * (i0), \ + verts + stride * (i1), \ + verts + stride * (i2) ) -#define LINE(flags,i0,i1) \ - do_line( draw, \ - flags, \ - verts + stride * ((i0) & ~DRAW_PIPE_FLAG_MASK), \ - verts + stride * ((i1) & ~DRAW_PIPE_FLAG_MASK)) +#define LINE(flags,i0,i1) \ + do_line( draw, flags, \ + verts + stride * (i0), \ + verts + stride * (i1) ) -#define POINT(i0) \ - do_point( draw, \ - verts + stride * ((i0) & ~DRAW_PIPE_FLAG_MASK) ) +#define POINT(i0) \ + do_point( draw, verts + stride * (i0) ) #define GET_ELT(idx) (idx) -- cgit v1.2.3 From 5f6ab5e259de826bb3795d90fdb0235c8997acb9 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Fri, 6 Aug 2010 15:29:50 +1000 Subject: r600g: start to fix up multiple targets. fixup exports from pixel shader for multi-cbs + depth buffer writing. Still crashes GPU running any of the multi-buffer or depth writing --- src/gallium/drivers/r600/r600_context.h | 2 +- src/gallium/drivers/r600/r600_shader.c | 18 +++++++++++-- src/gallium/drivers/r600/r600_state.c | 47 +++++++++++++++++++++++---------- 3 files changed, 50 insertions(+), 17 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/drivers/r600/r600_context.h b/src/gallium/drivers/r600/r600_context.h index 78da88fef5..c83949de42 100644 --- a/src/gallium/drivers/r600/r600_context.h +++ b/src/gallium/drivers/r600/r600_context.h @@ -94,7 +94,7 @@ struct r600_context_hw_states { struct radeon_state dsa; struct radeon_state blend; struct radeon_state viewport; - struct radeon_state cb0; + struct radeon_state cb[7]; struct radeon_state config; struct radeon_state cb_cntl; struct radeon_state db; diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index f38aa7b463..d925dcbe4b 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -158,7 +158,7 @@ static int r600_pipe_shader_ps(struct pipe_context *ctx, struct r600_context_sta struct r600_screen *rscreen = r600_screen(ctx->screen); struct r600_shader *rshader = &rpshader->shader; struct radeon_state *state; - unsigned i, tmp; + unsigned i, tmp, exports_ps, num_cout; int r; r = radeon_state_init(&rpshader->rstate, rscreen->rw, R600_PS_SHADER_TYPE, R600_PS_SHADER); @@ -174,11 +174,22 @@ static int r600_pipe_shader_ps(struct pipe_context *ctx, struct r600_context_sta } state->states[R600_PS_SHADER__SPI_PS_INPUT_CNTL_0 + i] = tmp; } + + exports_ps = 0; + num_cout = 0; + for (i = 0; i < rshader->noutput; i++) { + if (rshader->output[i].name == TGSI_SEMANTIC_POSITION) + exports_ps |= 1; + else if (rshader->output[i].name == TGSI_SEMANTIC_COLOR) { + exports_ps |= (1 << (num_cout+1)); + num_cout++; + } + } state->states[R600_PS_SHADER__SPI_PS_IN_CONTROL_0] = S_0286CC_NUM_INTERP(rshader->ninput) | S_0286CC_PERSP_GRADIENT_ENA(1); state->states[R600_PS_SHADER__SPI_PS_IN_CONTROL_1] = 0x00000000; state->states[R600_PS_SHADER__SQ_PGM_RESOURCES_PS] = S_028868_NUM_GPRS(rshader->bc.ngpr); - state->states[R600_PS_SHADER__SQ_PGM_EXPORTS_PS] = 0x00000002; + state->states[R600_PS_SHADER__SQ_PGM_EXPORTS_PS] = exports_ps; rpshader->rstate.bo[0] = radeon_bo_incref(rscreen->rw, rpshader->bo); rpshader->rstate.nbo = 1; rpshader->rstate.placement[0] = RADEON_GEM_DOMAIN_GTT; @@ -431,6 +442,9 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s if (shader->output[i].name == TGSI_SEMANTIC_COLOR) { output.array_base = 0; output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; + } else if (shader->output[i].name == TGSI_SEMANTIC_POSITION) { + output.array_base = 61; + output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; } else { R600_ERR("unsupported fragment output name %d\n", shader->output[i].name); r = -EINVAL; diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index 1a8ec48936..c5e74d1efc 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -660,24 +660,25 @@ static int r600_blend(struct r600_context *rctx, struct radeon_state *rstate) return radeon_state_pm4(rstate); } -static int r600_cb0(struct r600_context *rctx, struct radeon_state *rstate) +static int r600_cb(struct r600_context *rctx, struct radeon_state *rstate, int cb) { struct r600_screen *rscreen = rctx->screen; struct r600_resource_texture *rtex; struct r600_resource *rbuffer; const struct pipe_framebuffer_state *state = &rctx->framebuffer->state.framebuffer; - unsigned level = state->cbufs[0]->level; + unsigned level = state->cbufs[cb]->level; unsigned pitch, slice; unsigned color_info; unsigned format, swap, ntype; int r; const struct util_format_description *desc; + int id = R600_CB0 + cb; - r = radeon_state_init(rstate, rscreen->rw, R600_CB0_TYPE, R600_CB0); + r = radeon_state_init(rstate, rscreen->rw, R600_CB0_TYPE, id); if (r) return r; - rtex = (struct r600_resource_texture*)state->cbufs[0]->texture; + rtex = (struct r600_resource_texture*)state->cbufs[cb]->texture; rbuffer = &rtex->resource; rstate->bo[0] = radeon_bo_incref(rscreen->rw, rbuffer->bo); rstate->bo[1] = radeon_bo_incref(rscreen->rw, rbuffer->bo); @@ -687,7 +688,7 @@ static int r600_cb0(struct r600_context *rctx, struct radeon_state *rstate) rstate->placement[4] = RADEON_GEM_DOMAIN_GTT; rstate->nbo = 3; pitch = (rtex->pitch[level] / rtex->bpt) / 8 - 1; - slice = (rtex->pitch[level] / rtex->bpt) * state->cbufs[0]->height / 64 - 1; + slice = (rtex->pitch[level] / rtex->bpt) * state->cbufs[cb]->height / 64 - 1; ntype = 0; desc = util_format_description(rtex->resource.base.b.format); @@ -878,14 +879,20 @@ static int r600_dsa(struct r600_context *rctx, struct radeon_state *rstate) const struct pipe_depth_stencil_alpha_state *state = &rctx->dsa->state.dsa; const struct pipe_stencil_ref *stencil_ref = &rctx->stencil_ref->state.stencil_ref; struct r600_screen *rscreen = rctx->screen; - unsigned db_depth_control, alpha_test_control, alpha_ref; + unsigned db_depth_control, alpha_test_control, alpha_ref, db_shader_control; unsigned stencil_ref_mask, stencil_ref_mask_bf; - int r; + int r, i; + struct r600_shader *rshader = &rctx->ps_shader->shader; r = radeon_state_init(rstate, rscreen->rw, R600_DSA_TYPE, R600_DSA); if (r) return r; + db_shader_control = 0x210; + for (i = 0; i < rshader->noutput; i++) { + if (rshader->output[i].name == TGSI_SEMANTIC_POSITION) + db_shader_control |= 1; + } stencil_ref_mask = 0; stencil_ref_mask_bf = 0; db_depth_control = S_028800_Z_ENABLE(state->depth.enabled) | @@ -933,7 +940,7 @@ static int r600_dsa(struct r600_context *rctx, struct radeon_state *rstate) rstate->states[R600_DSA__SPI_FOG_FUNC_BIAS] = 0x00000000; rstate->states[R600_DSA__SPI_FOG_CNTL] = 0x00000000; rstate->states[R600_DSA__DB_DEPTH_CONTROL] = db_depth_control; - rstate->states[R600_DSA__DB_SHADER_CONTROL] = 0x00000210; + rstate->states[R600_DSA__DB_SHADER_CONTROL] = db_shader_control; rstate->states[R600_DSA__DB_RENDER_CONTROL] = 0x00000060; rstate->states[R600_DSA__DB_RENDER_OVERRIDE] = 0x0000002A; rstate->states[R600_DSA__DB_SRESULTS_COMPARE_STATE1] = 0x00000000; @@ -1159,12 +1166,18 @@ static int r600_cb_cntl(struct r600_context *rctx, struct radeon_state *rstate) { struct r600_screen *rscreen = rctx->screen; const struct pipe_blend_state *pbs = &rctx->blend->state.blend; - uint32_t color_control, target_mask; + int nr_cbufs = rctx->framebuffer->state.framebuffer.nr_cbufs; + uint32_t color_control, target_mask, shader_mask; int i, r; target_mask = 0; + shader_mask = 0; color_control = S_028808_PER_MRT_BLEND(1); + for (i = 0; i < nr_cbufs; i++) { + shader_mask |= 0xf << i; + } + if (pbs->logicop_enable) { color_control |= (pbs->logicop_func) << 16; } else @@ -1175,11 +1188,13 @@ static int r600_cb_cntl(struct r600_context *rctx, struct radeon_state *rstate) color_control |= S_028808_TARGET_BLEND_ENABLE(1 << i); } target_mask |= (pbs->rt[i].colormask << (4 * i)); + } r = radeon_state_init(rstate, rscreen->rw, R600_CB_CNTL_TYPE, R600_CB_CNTL); if (r) return r; - rstate->states[R600_CB_CNTL__CB_SHADER_MASK] = 0x0000000F; + + rstate->states[R600_CB_CNTL__CB_SHADER_MASK] = shader_mask; rstate->states[R600_CB_CNTL__CB_TARGET_MASK] = target_mask; rstate->states[R600_CB_CNTL__CB_COLOR_CONTROL] = color_control; rstate->states[R600_CB_CNTL__PA_SC_AA_CONFIG] = 0x00000000; @@ -1197,6 +1212,7 @@ int r600_context_hw_states(struct r600_context *rctx) { unsigned i; int r; + int nr_cbufs = rctx->framebuffer->state.framebuffer.nr_cbufs; /* free previous TODO determine what need to be updated, what * doesn't @@ -1210,7 +1226,8 @@ int r600_context_hw_states(struct r600_context *rctx) r600_dsa(rctx, &rctx->hw_states.dsa); r600_blend(rctx, &rctx->hw_states.blend); r600_viewport(rctx, &rctx->hw_states.viewport); - r600_cb0(rctx, &rctx->hw_states.cb0); + for (i = 0; i < nr_cbufs; i++) + r600_cb(rctx, &rctx->hw_states.cb[i], i); r600_db(rctx, &rctx->hw_states.db); r600_cb_cntl(rctx, &rctx->hw_states.cb_cntl); @@ -1250,9 +1267,11 @@ int r600_context_hw_states(struct r600_context *rctx) r = radeon_draw_set(&rctx->draw, &rctx->hw_states.viewport); if (r) return r; - r = radeon_draw_set(&rctx->draw, &rctx->hw_states.cb0); - if (r) - return r; + for (i = 0; i < nr_cbufs; i++) { + r = radeon_draw_set(&rctx->draw, &rctx->hw_states.cb[i]); + if (r) + return r; + } r = radeon_draw_set(&rctx->draw, &rctx->hw_states.config); if (r) return r; -- cgit v1.2.3 From a6859f5cccbec9f01d1ceea7f8ba055b787ff299 Mon Sep 17 00:00:00 2001 From: Vinson Lee Date: Thu, 5 Aug 2010 23:58:00 -0700 Subject: r300g: Remove unnecessary headers. --- src/gallium/drivers/r300/r300_hyperz.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/drivers/r300/r300_hyperz.c b/src/gallium/drivers/r300/r300_hyperz.c index e719342a46..10e440ce30 100644 --- a/src/gallium/drivers/r300/r300_hyperz.c +++ b/src/gallium/drivers/r300/r300_hyperz.c @@ -27,8 +27,6 @@ #include "r300_hyperz.h" #include "r300_reg.h" #include "r300_fs.h" -#include "r300_emit.h" -#include "r300_texture.h" /* HiZ rules - taken from various docs -- cgit v1.2.3 From d38afcd2f286e924e0f9b7f484712ac19e3f98fc Mon Sep 17 00:00:00 2001 From: Chia-I Wu Date: Fri, 6 Aug 2010 16:57:04 +0800 Subject: draw: Avoid mixed declarations and code. Do not expand LOCAL_VARS to void expression. Otherwise, declarations and code will be mixed when more variables are declared in FUNC_ENTER. This fixes fdo bug #29416. --- src/gallium/auxiliary/draw/draw_decompose_tmp.h | 4 ++-- src/gallium/auxiliary/draw/draw_pt_decompose.h | 2 +- src/gallium/auxiliary/draw/draw_pt_vcache_tmp.h | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/auxiliary/draw/draw_decompose_tmp.h b/src/gallium/auxiliary/draw/draw_decompose_tmp.h index cb25202323..a52d2b5058 100644 --- a/src/gallium/auxiliary/draw/draw_decompose_tmp.h +++ b/src/gallium/auxiliary/draw/draw_decompose_tmp.h @@ -30,7 +30,7 @@ /* these macros are optional */ #ifndef LOCAL_VARS -#define LOCAL_VARS do {} while (0) +#define LOCAL_VARS #endif #ifndef FUNC_ENTER #define FUNC_ENTER do {} while (0) @@ -50,7 +50,7 @@ FUNC(FUNC_VARS) { unsigned idx[6], i; ushort flags; - LOCAL_VARS; + LOCAL_VARS FUNC_ENTER; diff --git a/src/gallium/auxiliary/draw/draw_pt_decompose.h b/src/gallium/auxiliary/draw/draw_pt_decompose.h index e7ae9c4449..3127aad731 100644 --- a/src/gallium/auxiliary/draw/draw_pt_decompose.h +++ b/src/gallium/auxiliary/draw/draw_pt_decompose.h @@ -2,6 +2,6 @@ char *verts = (char *) vertices; \ const boolean last_vertex_last = \ !(draw->rasterizer->flatshade && \ - draw->rasterizer->flatshade_first) + draw->rasterizer->flatshade_first); #include "draw_decompose_tmp.h" diff --git a/src/gallium/auxiliary/draw/draw_pt_vcache_tmp.h b/src/gallium/auxiliary/draw/draw_pt_vcache_tmp.h index 861ce1adaa..1a3748d5f0 100644 --- a/src/gallium/auxiliary/draw/draw_pt_vcache_tmp.h +++ b/src/gallium/auxiliary/draw/draw_pt_vcache_tmp.h @@ -10,7 +10,7 @@ struct draw_context *draw = vcache->draw; \ const unsigned prim = vcache->input_prim; \ const boolean last_vertex_last = !(draw->rasterizer->flatshade && \ - draw->rasterizer->flatshade_first) + draw->rasterizer->flatshade_first); #define GET_ELT(idx) (get_elt(elts, idx) + elt_bias) -- cgit v1.2.3 From 6e6103004c9c737297b842a4aff298da920e7c33 Mon Sep 17 00:00:00 2001 From: Jerome Glisse Date: Fri, 6 Aug 2010 10:59:27 -0400 Subject: Revert "r600g: don't use dynamic state allocation for states" This reverts commit 9c949d4a4dd43b7889e13bdf683bcf211f049ced. Conflicts: src/gallium/drivers/r600/r600_context.h src/gallium/drivers/r600/r600_draw.c src/gallium/drivers/r600/r600_shader.c src/gallium/drivers/r600/r600_state.c --- src/gallium/drivers/r600/r600_context.c | 141 +++++++------- src/gallium/drivers/r600/r600_context.h | 29 ++- src/gallium/drivers/r600/r600_draw.c | 104 +++++------ src/gallium/drivers/r600/r600_shader.c | 34 ++-- src/gallium/drivers/r600/r600_state.c | 283 ++++++++++++++++++----------- src/gallium/drivers/r600/radeon.h | 52 ++---- src/gallium/targets/dri-r600/Makefile | 4 +- src/gallium/winsys/r600/drm/radeon.c | 11 ++ src/gallium/winsys/r600/drm/radeon_ctx.c | 160 +++++++++++----- src/gallium/winsys/r600/drm/radeon_draw.c | 92 +++++++++- src/gallium/winsys/r600/drm/radeon_priv.h | 29 +++ src/gallium/winsys/r600/drm/radeon_state.c | 70 ++++++- 12 files changed, 647 insertions(+), 362 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/drivers/r600/r600_context.c b/src/gallium/drivers/r600/r600_context.c index f7732d8952..ae1780a1d4 100644 --- a/src/gallium/drivers/r600/r600_context.c +++ b/src/gallium/drivers/r600/r600_context.c @@ -48,14 +48,18 @@ void r600_flush(struct pipe_context *ctx, unsigned flags, struct r600_screen *rscreen = rctx->screen; static int dc = 0; - if (radeon_ctx_pm4(&rctx->ctx)) + if (radeon_ctx_pm4(rctx->ctx)) return; /* FIXME dumping should be removed once shader support instructions * without throwing bad code */ if (!dc) - radeon_ctx_dump_bof(&rctx->ctx, "gallium.bof"); - radeon_ctx_submit(&rctx->ctx); + radeon_ctx_dump_bof(rctx->ctx, "gallium.bof"); +#if 1 + radeon_ctx_submit(rctx->ctx); +#endif + rctx->ctx = radeon_ctx_decref(rctx->ctx); + rctx->ctx = radeon_ctx(rscreen->rw); dc++; } @@ -216,8 +220,9 @@ static void r600_init_config(struct r600_context *rctx) printf("num_gs_stack_entries : %d\n", num_gs_stack_entries); printf("num_es_stack_entries : %d\n", num_es_stack_entries); - radeon_state_init(&rctx->config, rctx->rw, R600_CONFIG_TYPE, R600_CONFIG); - rctx->config.states[R600_CONFIG__SQ_CONFIG] = 0x00000000; + rctx->hw_states.config = radeon_state(rctx->rw, R600_CONFIG_TYPE, R600_CONFIG); + + rctx->hw_states.config->states[R600_CONFIG__SQ_CONFIG] = 0x00000000; switch (family) { case CHIP_RV610: case CHIP_RV620: @@ -226,75 +231,75 @@ static void r600_init_config(struct r600_context *rctx) case CHIP_RV710: break; default: - rctx->config.states[R600_CONFIG__SQ_CONFIG] |= S_008C00_VC_ENABLE(1); + rctx->hw_states.config->states[R600_CONFIG__SQ_CONFIG] |= S_008C00_VC_ENABLE(1); break; } - rctx->config.states[R600_CONFIG__SQ_CONFIG] |= S_008C00_DX9_CONSTS(1); - rctx->config.states[R600_CONFIG__SQ_CONFIG] |= S_008C00_ALU_INST_PREFER_VECTOR(1); - rctx->config.states[R600_CONFIG__SQ_CONFIG] |= S_008C00_PS_PRIO(ps_prio); - rctx->config.states[R600_CONFIG__SQ_CONFIG] |= S_008C00_VS_PRIO(vs_prio); - rctx->config.states[R600_CONFIG__SQ_CONFIG] |= S_008C00_GS_PRIO(gs_prio); - rctx->config.states[R600_CONFIG__SQ_CONFIG] |= S_008C00_ES_PRIO(es_prio); + rctx->hw_states.config->states[R600_CONFIG__SQ_CONFIG] |= S_008C00_DX9_CONSTS(1); + rctx->hw_states.config->states[R600_CONFIG__SQ_CONFIG] |= S_008C00_ALU_INST_PREFER_VECTOR(1); + rctx->hw_states.config->states[R600_CONFIG__SQ_CONFIG] |= S_008C00_PS_PRIO(ps_prio); + rctx->hw_states.config->states[R600_CONFIG__SQ_CONFIG] |= S_008C00_VS_PRIO(vs_prio); + rctx->hw_states.config->states[R600_CONFIG__SQ_CONFIG] |= S_008C00_GS_PRIO(gs_prio); + rctx->hw_states.config->states[R600_CONFIG__SQ_CONFIG] |= S_008C00_ES_PRIO(es_prio); - rctx->config.states[R600_CONFIG__SQ_GPR_RESOURCE_MGMT_1] = 0; - rctx->config.states[R600_CONFIG__SQ_GPR_RESOURCE_MGMT_1] |= S_008C04_NUM_PS_GPRS(num_ps_gprs); - rctx->config.states[R600_CONFIG__SQ_GPR_RESOURCE_MGMT_1] |= S_008C04_NUM_VS_GPRS(num_vs_gprs); - rctx->config.states[R600_CONFIG__SQ_GPR_RESOURCE_MGMT_1] |= S_008C04_NUM_CLAUSE_TEMP_GPRS(num_temp_gprs); + rctx->hw_states.config->states[R600_CONFIG__SQ_GPR_RESOURCE_MGMT_1] = 0; + rctx->hw_states.config->states[R600_CONFIG__SQ_GPR_RESOURCE_MGMT_1] |= S_008C04_NUM_PS_GPRS(num_ps_gprs); + rctx->hw_states.config->states[R600_CONFIG__SQ_GPR_RESOURCE_MGMT_1] |= S_008C04_NUM_VS_GPRS(num_vs_gprs); + rctx->hw_states.config->states[R600_CONFIG__SQ_GPR_RESOURCE_MGMT_1] |= S_008C04_NUM_CLAUSE_TEMP_GPRS(num_temp_gprs); - rctx->config.states[R600_CONFIG__SQ_GPR_RESOURCE_MGMT_2] = 0; - rctx->config.states[R600_CONFIG__SQ_GPR_RESOURCE_MGMT_2] |= S_008C08_NUM_GS_GPRS(num_gs_gprs); - rctx->config.states[R600_CONFIG__SQ_GPR_RESOURCE_MGMT_2] |= S_008C08_NUM_GS_GPRS(num_es_gprs); + rctx->hw_states.config->states[R600_CONFIG__SQ_GPR_RESOURCE_MGMT_2] = 0; + rctx->hw_states.config->states[R600_CONFIG__SQ_GPR_RESOURCE_MGMT_2] |= S_008C08_NUM_GS_GPRS(num_gs_gprs); + rctx->hw_states.config->states[R600_CONFIG__SQ_GPR_RESOURCE_MGMT_2] |= S_008C08_NUM_GS_GPRS(num_es_gprs); - rctx->config.states[R600_CONFIG__SQ_THREAD_RESOURCE_MGMT] = 0; - rctx->config.states[R600_CONFIG__SQ_THREAD_RESOURCE_MGMT] |= S_008C0C_NUM_PS_THREADS(num_ps_threads); - rctx->config.states[R600_CONFIG__SQ_THREAD_RESOURCE_MGMT] |= S_008C0C_NUM_VS_THREADS(num_vs_threads); - rctx->config.states[R600_CONFIG__SQ_THREAD_RESOURCE_MGMT] |= S_008C0C_NUM_GS_THREADS(num_gs_threads); - rctx->config.states[R600_CONFIG__SQ_THREAD_RESOURCE_MGMT] |= S_008C0C_NUM_ES_THREADS(num_es_threads); + rctx->hw_states.config->states[R600_CONFIG__SQ_THREAD_RESOURCE_MGMT] = 0; + rctx->hw_states.config->states[R600_CONFIG__SQ_THREAD_RESOURCE_MGMT] |= S_008C0C_NUM_PS_THREADS(num_ps_threads); + rctx->hw_states.config->states[R600_CONFIG__SQ_THREAD_RESOURCE_MGMT] |= S_008C0C_NUM_VS_THREADS(num_vs_threads); + rctx->hw_states.config->states[R600_CONFIG__SQ_THREAD_RESOURCE_MGMT] |= S_008C0C_NUM_GS_THREADS(num_gs_threads); + rctx->hw_states.config->states[R600_CONFIG__SQ_THREAD_RESOURCE_MGMT] |= S_008C0C_NUM_ES_THREADS(num_es_threads); - rctx->config.states[R600_CONFIG__SQ_STACK_RESOURCE_MGMT_1] = 0; - rctx->config.states[R600_CONFIG__SQ_STACK_RESOURCE_MGMT_1] |= S_008C10_NUM_PS_STACK_ENTRIES(num_ps_stack_entries); - rctx->config.states[R600_CONFIG__SQ_STACK_RESOURCE_MGMT_1] |= S_008C10_NUM_VS_STACK_ENTRIES(num_vs_stack_entries); + rctx->hw_states.config->states[R600_CONFIG__SQ_STACK_RESOURCE_MGMT_1] = 0; + rctx->hw_states.config->states[R600_CONFIG__SQ_STACK_RESOURCE_MGMT_1] |= S_008C10_NUM_PS_STACK_ENTRIES(num_ps_stack_entries); + rctx->hw_states.config->states[R600_CONFIG__SQ_STACK_RESOURCE_MGMT_1] |= S_008C10_NUM_VS_STACK_ENTRIES(num_vs_stack_entries); - rctx->config.states[R600_CONFIG__SQ_STACK_RESOURCE_MGMT_2] = 0; - rctx->config.states[R600_CONFIG__SQ_STACK_RESOURCE_MGMT_2] |= S_008C14_NUM_GS_STACK_ENTRIES(num_gs_stack_entries); - rctx->config.states[R600_CONFIG__SQ_STACK_RESOURCE_MGMT_2] |= S_008C14_NUM_ES_STACK_ENTRIES(num_es_stack_entries); + rctx->hw_states.config->states[R600_CONFIG__SQ_STACK_RESOURCE_MGMT_2] = 0; + rctx->hw_states.config->states[R600_CONFIG__SQ_STACK_RESOURCE_MGMT_2] |= S_008C14_NUM_GS_STACK_ENTRIES(num_gs_stack_entries); + rctx->hw_states.config->states[R600_CONFIG__SQ_STACK_RESOURCE_MGMT_2] |= S_008C14_NUM_ES_STACK_ENTRIES(num_es_stack_entries); - rctx->config.states[R600_CONFIG__SQ_DYN_GPR_CNTL_PS_FLUSH_REQ] = 0x00004000; - rctx->config.states[R600_CONFIG__TA_CNTL_AUX] = 0x07000002; - rctx->config.states[R600_CONFIG__VC_ENHANCE] = 0x00000000; - rctx->config.states[R600_CONFIG__DB_DEBUG] = 0x00000000; - rctx->config.states[R600_CONFIG__DB_WATERMARKS] = 0x00420204; - rctx->config.states[R600_CONFIG__SX_MISC] = 0x00000000; - rctx->config.states[R600_CONFIG__SPI_THREAD_GROUPING] = 0x00000001; - rctx->config.states[R600_CONFIG__CB_SHADER_CONTROL] = 0x00000003; - rctx->config.states[R600_CONFIG__SQ_ESGS_RING_ITEMSIZE] = 0x00000000; - rctx->config.states[R600_CONFIG__SQ_GSVS_RING_ITEMSIZE] = 0x00000000; - rctx->config.states[R600_CONFIG__SQ_ESTMP_RING_ITEMSIZE] = 0x00000000; - rctx->config.states[R600_CONFIG__SQ_GSTMP_RING_ITEMSIZE] = 0x00000000; - rctx->config.states[R600_CONFIG__SQ_VSTMP_RING_ITEMSIZE] = 0x00000000; - rctx->config.states[R600_CONFIG__SQ_PSTMP_RING_ITEMSIZE] = 0x00000000; - rctx->config.states[R600_CONFIG__SQ_FBUF_RING_ITEMSIZE] = 0x00000000; - rctx->config.states[R600_CONFIG__SQ_REDUC_RING_ITEMSIZE] = 0x00000000; - rctx->config.states[R600_CONFIG__SQ_GS_VERT_ITEMSIZE] = 0x00000000; - rctx->config.states[R600_CONFIG__VGT_OUTPUT_PATH_CNTL] = 0x00000000; - rctx->config.states[R600_CONFIG__VGT_HOS_CNTL] = 0x00000000; - rctx->config.states[R600_CONFIG__VGT_HOS_MAX_TESS_LEVEL] = 0x00000000; - rctx->config.states[R600_CONFIG__VGT_HOS_MIN_TESS_LEVEL] = 0x00000000; - rctx->config.states[R600_CONFIG__VGT_HOS_REUSE_DEPTH] = 0x00000000; - rctx->config.states[R600_CONFIG__VGT_GROUP_PRIM_TYPE] = 0x00000000; - rctx->config.states[R600_CONFIG__VGT_GROUP_FIRST_DECR] = 0x00000000; - rctx->config.states[R600_CONFIG__VGT_GROUP_DECR] = 0x00000000; - rctx->config.states[R600_CONFIG__VGT_GROUP_VECT_0_CNTL] = 0x00000000; - rctx->config.states[R600_CONFIG__VGT_GROUP_VECT_1_CNTL] = 0x00000000; - rctx->config.states[R600_CONFIG__VGT_GROUP_VECT_0_FMT_CNTL] = 0x00000000; - rctx->config.states[R600_CONFIG__VGT_GROUP_VECT_1_FMT_CNTL] = 0x00000000; - rctx->config.states[R600_CONFIG__VGT_GS_MODE] = 0x00000000; - rctx->config.states[R600_CONFIG__PA_SC_MODE_CNTL] = 0x00514000; - rctx->config.states[R600_CONFIG__VGT_STRMOUT_EN] = 0x00000000; - rctx->config.states[R600_CONFIG__VGT_REUSE_OFF] = 0x00000001; - rctx->config.states[R600_CONFIG__VGT_VTX_CNT_EN] = 0x00000000; - rctx->config.states[R600_CONFIG__VGT_STRMOUT_BUFFER_EN] = 0x00000000; - radeon_state_pm4(&rctx->config); + rctx->hw_states.config->states[R600_CONFIG__SQ_DYN_GPR_CNTL_PS_FLUSH_REQ] = 0x00004000; + rctx->hw_states.config->states[R600_CONFIG__TA_CNTL_AUX] = 0x07000002; + rctx->hw_states.config->states[R600_CONFIG__VC_ENHANCE] = 0x00000000; + rctx->hw_states.config->states[R600_CONFIG__DB_DEBUG] = 0x00000000; + rctx->hw_states.config->states[R600_CONFIG__DB_WATERMARKS] = 0x00420204; + rctx->hw_states.config->states[R600_CONFIG__SX_MISC] = 0x00000000; + rctx->hw_states.config->states[R600_CONFIG__SPI_THREAD_GROUPING] = 0x00000001; + rctx->hw_states.config->states[R600_CONFIG__CB_SHADER_CONTROL] = 0x00000003; + rctx->hw_states.config->states[R600_CONFIG__SQ_ESGS_RING_ITEMSIZE] = 0x00000000; + rctx->hw_states.config->states[R600_CONFIG__SQ_GSVS_RING_ITEMSIZE] = 0x00000000; + rctx->hw_states.config->states[R600_CONFIG__SQ_ESTMP_RING_ITEMSIZE] = 0x00000000; + rctx->hw_states.config->states[R600_CONFIG__SQ_GSTMP_RING_ITEMSIZE] = 0x00000000; + rctx->hw_states.config->states[R600_CONFIG__SQ_VSTMP_RING_ITEMSIZE] = 0x00000000; + rctx->hw_states.config->states[R600_CONFIG__SQ_PSTMP_RING_ITEMSIZE] = 0x00000000; + rctx->hw_states.config->states[R600_CONFIG__SQ_FBUF_RING_ITEMSIZE] = 0x00000000; + rctx->hw_states.config->states[R600_CONFIG__SQ_REDUC_RING_ITEMSIZE] = 0x00000000; + rctx->hw_states.config->states[R600_CONFIG__SQ_GS_VERT_ITEMSIZE] = 0x00000000; + rctx->hw_states.config->states[R600_CONFIG__VGT_OUTPUT_PATH_CNTL] = 0x00000000; + rctx->hw_states.config->states[R600_CONFIG__VGT_HOS_CNTL] = 0x00000000; + rctx->hw_states.config->states[R600_CONFIG__VGT_HOS_MAX_TESS_LEVEL] = 0x00000000; + rctx->hw_states.config->states[R600_CONFIG__VGT_HOS_MIN_TESS_LEVEL] = 0x00000000; + rctx->hw_states.config->states[R600_CONFIG__VGT_HOS_REUSE_DEPTH] = 0x00000000; + rctx->hw_states.config->states[R600_CONFIG__VGT_GROUP_PRIM_TYPE] = 0x00000000; + rctx->hw_states.config->states[R600_CONFIG__VGT_GROUP_FIRST_DECR] = 0x00000000; + rctx->hw_states.config->states[R600_CONFIG__VGT_GROUP_DECR] = 0x00000000; + rctx->hw_states.config->states[R600_CONFIG__VGT_GROUP_VECT_0_CNTL] = 0x00000000; + rctx->hw_states.config->states[R600_CONFIG__VGT_GROUP_VECT_1_CNTL] = 0x00000000; + rctx->hw_states.config->states[R600_CONFIG__VGT_GROUP_VECT_0_FMT_CNTL] = 0x00000000; + rctx->hw_states.config->states[R600_CONFIG__VGT_GROUP_VECT_1_FMT_CNTL] = 0x00000000; + rctx->hw_states.config->states[R600_CONFIG__VGT_GS_MODE] = 0x00000000; + rctx->hw_states.config->states[R600_CONFIG__PA_SC_MODE_CNTL] = 0x00514000; + rctx->hw_states.config->states[R600_CONFIG__VGT_STRMOUT_EN] = 0x00000000; + rctx->hw_states.config->states[R600_CONFIG__VGT_REUSE_OFF] = 0x00000001; + rctx->hw_states.config->states[R600_CONFIG__VGT_VTX_CNT_EN] = 0x00000000; + rctx->hw_states.config->states[R600_CONFIG__VGT_STRMOUT_BUFFER_EN] = 0x00000000; + radeon_state_pm4(rctx->hw_states.config); } struct pipe_context *r600_create_context(struct pipe_screen *screen, void *priv) @@ -328,7 +333,7 @@ struct pipe_context *r600_create_context(struct pipe_screen *screen, void *priv) r600_init_config(rctx); - radeon_ctx_init(&rctx->ctx, rscreen->rw); - radeon_draw_init(&rctx->draw, rscreen->rw); + rctx->ctx = radeon_ctx(rscreen->rw); + rctx->draw = radeon_draw(rscreen->rw); return &rctx->context; } diff --git a/src/gallium/drivers/r600/r600_context.h b/src/gallium/drivers/r600/r600_context.h index c83949de42..431f8951b2 100644 --- a/src/gallium/drivers/r600/r600_context.h +++ b/src/gallium/drivers/r600/r600_context.h @@ -76,7 +76,7 @@ struct r600_context_state { union pipe_states state; unsigned refcount; unsigned type; - struct radeon_state rstate; + struct radeon_state *rstate; struct r600_shader shader; struct radeon_bo *bo; }; @@ -89,28 +89,28 @@ struct r600_vertex_element }; struct r600_context_hw_states { - struct radeon_state rasterizer; - struct radeon_state scissor; - struct radeon_state dsa; - struct radeon_state blend; - struct radeon_state viewport; - struct radeon_state cb[7]; - struct radeon_state config; - struct radeon_state cb_cntl; - struct radeon_state db; + struct radeon_state *rasterizer; + struct radeon_state *scissor; + struct radeon_state *dsa; + struct radeon_state *blend; + struct radeon_state *viewport; + struct radeon_state *cb[7]; + struct radeon_state *config; + struct radeon_state *cb_cntl; + struct radeon_state *db; unsigned ps_nresource; unsigned ps_nsampler; - struct radeon_state ps_resource[160]; - struct radeon_state ps_sampler[16]; + struct radeon_state *ps_resource[160]; + struct radeon_state *ps_sampler[16]; }; struct r600_context { struct pipe_context context; struct r600_screen *screen; struct radeon *rw; - struct radeon_ctx ctx; + struct radeon_ctx *ctx; struct blitter_context *blitter; - struct radeon_draw draw; + struct radeon_draw *draw; /* hw states */ struct r600_context_hw_states hw_states; /* pipe states */ @@ -120,7 +120,6 @@ struct r600_context { unsigned ps_nsampler_view; unsigned vs_nsampler_view; unsigned nvertex_buffer; - struct radeon_state config; struct r600_context_state *rasterizer; struct r600_context_state *poly_stipple; struct r600_context_state *scissor; diff --git a/src/gallium/drivers/r600/r600_draw.c b/src/gallium/drivers/r600/r600_draw.c index 3a54cee2d9..2420b76318 100644 --- a/src/gallium/drivers/r600/r600_draw.c +++ b/src/gallium/drivers/r600/r600_draw.c @@ -38,8 +38,8 @@ struct r600_draw { struct pipe_context *ctx; - struct radeon_state draw; - struct radeon_state vgt; + struct radeon_state *draw; + struct radeon_state *vgt; unsigned mode; unsigned start; unsigned count; @@ -51,7 +51,7 @@ static int r600_draw_common(struct r600_draw *draw) { struct r600_context *rctx = r600_context(draw->ctx); struct r600_screen *rscreen = rctx->screen; - struct radeon_state vs_resource; + struct radeon_state *vs_resource; struct r600_resource *rbuffer; unsigned i, j, offset, format, prim; u32 vgt_dma_index_type, vgt_draw_initiator; @@ -88,10 +88,10 @@ static int r600_draw_common(struct r600_draw *draw) r = r600_pipe_shader_update(draw->ctx, rctx->ps_shader); if (r) return r; - r = radeon_draw_set(&rctx->draw, &rctx->vs_shader->rstate); + r = radeon_draw_set(rctx->draw, rctx->vs_shader->rstate); if (r) return r; - r = radeon_draw_set(&rctx->draw, &rctx->ps_shader->rstate); + r = radeon_draw_set(rctx->draw, rctx->ps_shader->rstate); if (r) return r; @@ -101,68 +101,68 @@ static int r600_draw_common(struct r600_draw *draw) rbuffer = (struct r600_resource*)vertex_buffer->buffer; offset = rctx->vertex_elements->elements[i].src_offset + vertex_buffer->buffer_offset; format = r600_translate_colorformat(rctx->vertex_elements->elements[i].src_format); - r = radeon_state_init(&vs_resource, rscreen->rw, R600_VS_RESOURCE_TYPE, R600_VS_RESOURCE + i); - if (r) - return r; - vs_resource.bo[0] = radeon_bo_incref(rscreen->rw, rbuffer->bo); - vs_resource.nbo = 1; - vs_resource.states[R600_PS_RESOURCE__RESOURCE0_WORD0] = offset; - vs_resource.states[R600_PS_RESOURCE__RESOURCE0_WORD1] = rbuffer->bo->size - offset; - vs_resource.states[R600_PS_RESOURCE__RESOURCE0_WORD2] = S_038008_STRIDE(vertex_buffer->stride) | + vs_resource = radeon_state(rscreen->rw, R600_VS_RESOURCE_TYPE, R600_VS_RESOURCE + i); + if (vs_resource == NULL) + return -ENOMEM; + vs_resource->bo[0] = radeon_bo_incref(rscreen->rw, rbuffer->bo); + vs_resource->nbo = 1; + vs_resource->states[R600_PS_RESOURCE__RESOURCE0_WORD0] = offset; + vs_resource->states[R600_PS_RESOURCE__RESOURCE0_WORD1] = rbuffer->bo->size - offset; + vs_resource->states[R600_PS_RESOURCE__RESOURCE0_WORD2] = S_038008_STRIDE(vertex_buffer->stride) | S_038008_DATA_FORMAT(format); - vs_resource.states[R600_PS_RESOURCE__RESOURCE0_WORD3] = 0x00000000; - vs_resource.states[R600_PS_RESOURCE__RESOURCE0_WORD4] = 0x00000000; - vs_resource.states[R600_PS_RESOURCE__RESOURCE0_WORD5] = 0x00000000; - vs_resource.states[R600_PS_RESOURCE__RESOURCE0_WORD6] = 0xC0000000; - vs_resource.placement[0] = RADEON_GEM_DOMAIN_GTT; - vs_resource.placement[1] = RADEON_GEM_DOMAIN_GTT; - radeon_state_pm4(&vs_resource); - r = radeon_draw_set(&rctx->draw, &vs_resource); + vs_resource->states[R600_PS_RESOURCE__RESOURCE0_WORD3] = 0x00000000; + vs_resource->states[R600_PS_RESOURCE__RESOURCE0_WORD4] = 0x00000000; + vs_resource->states[R600_PS_RESOURCE__RESOURCE0_WORD5] = 0x00000000; + vs_resource->states[R600_PS_RESOURCE__RESOURCE0_WORD6] = 0xC0000000; + vs_resource->placement[0] = RADEON_GEM_DOMAIN_GTT; + vs_resource->placement[1] = RADEON_GEM_DOMAIN_GTT; + r = radeon_draw_set_new(rctx->draw, vs_resource); if (r) return r; } /* FIXME start need to change winsys */ - r = radeon_state_init(&draw->draw, rscreen->rw, R600_DRAW_TYPE, R600_DRAW); - if (r) - return r; - draw->draw.states[R600_DRAW__VGT_NUM_INDICES] = draw->count; - draw->draw.states[R600_DRAW__VGT_DRAW_INITIATOR] = vgt_draw_initiator; + draw->draw = radeon_state(rscreen->rw, R600_DRAW_TYPE, R600_DRAW); + if (draw->draw == NULL) + return -ENOMEM; + draw->draw->states[R600_DRAW__VGT_NUM_INDICES] = draw->count; + draw->draw->states[R600_DRAW__VGT_DRAW_INITIATOR] = vgt_draw_initiator; if (draw->index_buffer) { - rbuffer = (struct r600_resource*)draw->index_buffer; - draw->draw.bo[0] = radeon_bo_incref(rscreen->rw, rbuffer->bo); - draw->draw.placement[0] = RADEON_GEM_DOMAIN_GTT; - draw->draw.placement[1] = RADEON_GEM_DOMAIN_GTT; - draw->draw.nbo = 1; + rbuffer = (struct r600_buffer*)draw->index_buffer; + draw->draw->bo[0] = radeon_bo_incref(rscreen->rw, rbuffer->bo); + draw->draw->placement[0] = RADEON_GEM_DOMAIN_GTT; + draw->draw->placement[1] = RADEON_GEM_DOMAIN_GTT; + draw->draw->nbo = 1; } - radeon_state_pm4(&draw->draw); - r = radeon_draw_set(&rctx->draw, &draw->draw); + r = radeon_draw_set_new(rctx->draw, draw->draw); if (r) return r; - r = radeon_state_init(&draw->vgt, rscreen->rw, R600_VGT_TYPE, R600_VGT); - if (r) - return r; - draw->vgt.states[R600_VGT__VGT_PRIMITIVE_TYPE] = prim; - draw->vgt.states[R600_VGT__VGT_MAX_VTX_INDX] = 0x00FFFFFF; - draw->vgt.states[R600_VGT__VGT_MIN_VTX_INDX] = 0x00000000; - draw->vgt.states[R600_VGT__VGT_INDX_OFFSET] = draw->start; - draw->vgt.states[R600_VGT__VGT_MULTI_PRIM_IB_RESET_INDX] = 0x00000000; - draw->vgt.states[R600_VGT__VGT_DMA_INDEX_TYPE] = vgt_dma_index_type; - draw->vgt.states[R600_VGT__VGT_PRIMITIVEID_EN] = 0x00000000; - draw->vgt.states[R600_VGT__VGT_DMA_NUM_INSTANCES] = 0x00000001; - draw->vgt.states[R600_VGT__VGT_MULTI_PRIM_IB_RESET_EN] = 0x00000000; - draw->vgt.states[R600_VGT__VGT_INSTANCE_STEP_RATE_0] = 0x00000000; - draw->vgt.states[R600_VGT__VGT_INSTANCE_STEP_RATE_1] = 0x00000000; - radeon_state_pm4(&draw->vgt); - r = radeon_draw_set(&rctx->draw, &draw->vgt); + draw->vgt = radeon_state(rscreen->rw, R600_VGT_TYPE, R600_VGT); + if (draw->vgt == NULL) + return -ENOMEM; + draw->vgt->states[R600_VGT__VGT_PRIMITIVE_TYPE] = prim; + draw->vgt->states[R600_VGT__VGT_MAX_VTX_INDX] = 0x00FFFFFF; + draw->vgt->states[R600_VGT__VGT_MIN_VTX_INDX] = 0x00000000; + draw->vgt->states[R600_VGT__VGT_INDX_OFFSET] = draw->start; + draw->vgt->states[R600_VGT__VGT_MULTI_PRIM_IB_RESET_INDX] = 0x00000000; + draw->vgt->states[R600_VGT__VGT_DMA_INDEX_TYPE] = vgt_dma_index_type; + draw->vgt->states[R600_VGT__VGT_PRIMITIVEID_EN] = 0x00000000; + draw->vgt->states[R600_VGT__VGT_DMA_NUM_INSTANCES] = 0x00000001; + draw->vgt->states[R600_VGT__VGT_MULTI_PRIM_IB_RESET_EN] = 0x00000000; + draw->vgt->states[R600_VGT__VGT_INSTANCE_STEP_RATE_0] = 0x00000000; + draw->vgt->states[R600_VGT__VGT_INSTANCE_STEP_RATE_1] = 0x00000000; + r = radeon_draw_set_new(rctx->draw, draw->vgt); if (r) return r; /* FIXME */ - r = radeon_ctx_set_draw(&rctx->ctx, &rctx->draw); + r = radeon_ctx_set_draw_new(rctx->ctx, rctx->draw); if (r == -EBUSY) { r600_flush(draw->ctx, 0, NULL); - r = radeon_ctx_set_draw(&rctx->ctx, &rctx->draw); + r = radeon_ctx_set_draw_new(rctx->ctx, rctx->draw); } - return r; + if (r) + return r; + rctx->draw = radeon_draw_duplicate(rctx->draw); + return 0; } void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info) diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index d925dcbe4b..dc8d4cb315 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -130,12 +130,11 @@ static int r600_pipe_shader_vs(struct pipe_context *ctx, struct r600_context_sta struct r600_shader *rshader = &rpshader->shader; struct radeon_state *state; unsigned i, tmp; - int r; - r = radeon_state_init(&rpshader->rstate, rscreen->rw, R600_VS_SHADER_TYPE, R600_VS_SHADER); - if (r) - return r; - state = &rpshader->rstate; + rpshader->rstate = radeon_state_decref(rpshader->rstate); + state = radeon_state(rscreen->rw, R600_VS_SHADER_TYPE, R600_VS_SHADER); + if (state == NULL) + return -ENOMEM; for (i = 0; i < 10; i++) { state->states[R600_VS_SHADER__SPI_VS_OUT_ID_0 + i] = 0; } @@ -146,10 +145,11 @@ static int r600_pipe_shader_vs(struct pipe_context *ctx, struct r600_context_sta } state->states[R600_VS_SHADER__SPI_VS_OUT_CONFIG] = S_0286C4_VS_EXPORT_COUNT(rshader->noutput - 2); state->states[R600_VS_SHADER__SQ_PGM_RESOURCES_VS] = S_028868_NUM_GPRS(rshader->bc.ngpr); - rpshader->rstate.bo[0] = radeon_bo_incref(rscreen->rw, rpshader->bo); - rpshader->rstate.bo[1] = radeon_bo_incref(rscreen->rw, rpshader->bo); - rpshader->rstate.nbo = 2; - rpshader->rstate.placement[0] = RADEON_GEM_DOMAIN_GTT; + rpshader->rstate = state; + rpshader->rstate->bo[0] = radeon_bo_incref(rscreen->rw, rpshader->bo); + rpshader->rstate->bo[1] = radeon_bo_incref(rscreen->rw, rpshader->bo); + rpshader->rstate->nbo = 2; + rpshader->rstate->placement[0] = RADEON_GEM_DOMAIN_GTT; return radeon_state_pm4(state); } @@ -159,12 +159,11 @@ static int r600_pipe_shader_ps(struct pipe_context *ctx, struct r600_context_sta struct r600_shader *rshader = &rpshader->shader; struct radeon_state *state; unsigned i, tmp, exports_ps, num_cout; - int r; - r = radeon_state_init(&rpshader->rstate, rscreen->rw, R600_PS_SHADER_TYPE, R600_PS_SHADER); - if (r) - return r; - state = &rpshader->rstate; + rpshader->rstate = radeon_state_decref(rpshader->rstate); + state = radeon_state(rscreen->rw, R600_PS_SHADER_TYPE, R600_PS_SHADER); + if (state == NULL) + return -ENOMEM; for (i = 0; i < rshader->ninput; i++) { tmp = S_028644_SEMANTIC(i); tmp |= S_028644_SEL_CENTROID(1); @@ -190,9 +189,10 @@ static int r600_pipe_shader_ps(struct pipe_context *ctx, struct r600_context_sta state->states[R600_PS_SHADER__SPI_PS_IN_CONTROL_1] = 0x00000000; state->states[R600_PS_SHADER__SQ_PGM_RESOURCES_PS] = S_028868_NUM_GPRS(rshader->bc.ngpr); state->states[R600_PS_SHADER__SQ_PGM_EXPORTS_PS] = exports_ps; - rpshader->rstate.bo[0] = radeon_bo_incref(rscreen->rw, rpshader->bo); - rpshader->rstate.nbo = 1; - rpshader->rstate.placement[0] = RADEON_GEM_DOMAIN_GTT; + rpshader->rstate = state; + rpshader->rstate->bo[0] = radeon_bo_incref(rscreen->rw, rpshader->bo); + rpshader->rstate->nbo = 1; + rpshader->rstate->placement[0] = RADEON_GEM_DOMAIN_GTT; return radeon_state_pm4(state); } diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index c5e74d1efc..e8871cd748 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -277,10 +277,9 @@ static void r600_set_constant_buffer(struct pipe_context *ctx, struct r600_screen *rscreen = r600_screen(ctx->screen); struct r600_context *rctx = r600_context(ctx); unsigned nconstant = 0, i, type, id; - struct radeon_state rstate; + struct radeon_state *rstate; struct pipe_transfer *transfer; u32 *ptr; - int r; switch (shader) { case PIPE_SHADER_VERTEX: @@ -301,16 +300,16 @@ static void r600_set_constant_buffer(struct pipe_context *ctx, if (ptr == NULL) return; for (i = 0; i < nconstant; i++) { - r = radeon_state_init(&rstate, rscreen->rw, type, id + i); - if (r) + rstate = radeon_state(rscreen->rw, type, id + i); + if (rstate == NULL) return; - rstate.states[R600_PS_CONSTANT__SQ_ALU_CONSTANT0_0] = ptr[i * 4 + 0]; - rstate.states[R600_PS_CONSTANT__SQ_ALU_CONSTANT1_0] = ptr[i * 4 + 1]; - rstate.states[R600_PS_CONSTANT__SQ_ALU_CONSTANT2_0] = ptr[i * 4 + 2]; - rstate.states[R600_PS_CONSTANT__SQ_ALU_CONSTANT3_0] = ptr[i * 4 + 3]; - if (radeon_state_pm4(&rstate)) + rstate->states[R600_PS_CONSTANT__SQ_ALU_CONSTANT0_0] = ptr[i * 4 + 0]; + rstate->states[R600_PS_CONSTANT__SQ_ALU_CONSTANT1_0] = ptr[i * 4 + 1]; + rstate->states[R600_PS_CONSTANT__SQ_ALU_CONSTANT2_0] = ptr[i * 4 + 2]; + rstate->states[R600_PS_CONSTANT__SQ_ALU_CONSTANT3_0] = ptr[i * 4 + 3]; + if (radeon_state_pm4(rstate)) return; - if (radeon_draw_set(&rctx->draw, &rstate)) + if (radeon_draw_set_new(rctx->draw, rstate)) return; } pipe_buffer_unmap(ctx, buffer, transfer); @@ -521,6 +520,7 @@ struct r600_context_state *r600_context_state_decref(struct r600_context_state * R600_ERR("invalid type %d\n", rstate->type); return NULL; } + radeon_state_decref(rstate->rstate); FREE(rstate); return NULL; } @@ -603,17 +603,16 @@ struct r600_context_state *r600_context_state(struct r600_context *rctx, unsigne return rstate; } -static int r600_blend(struct r600_context *rctx, struct radeon_state *rstate) +static struct radeon_state *r600_blend(struct r600_context *rctx) { struct r600_screen *rscreen = rctx->screen; + struct radeon_state *rstate; const struct pipe_blend_state *state = &rctx->blend->state.blend; int i; - int r; - - r = radeon_state_init(rstate, rscreen->rw, R600_BLEND_TYPE, R600_BLEND); - if (r) - return r; + rstate = radeon_state(rscreen->rw, R600_BLEND_TYPE, R600_BLEND); + if (rstate == NULL) + return NULL; rstate->states[R600_BLEND__CB_BLEND_RED] = fui(rctx->blend_color.color[0]); rstate->states[R600_BLEND__CB_BLEND_GREEN] = fui(rctx->blend_color.color[1]); rstate->states[R600_BLEND__CB_BLEND_BLUE] = fui(rctx->blend_color.color[2]); @@ -657,27 +656,30 @@ static int r600_blend(struct r600_context *rctx, struct radeon_state *rstate) rstate->states[R600_BLEND__CB_BLEND_CONTROL] = bc; } - return radeon_state_pm4(rstate); + if (radeon_state_pm4(rstate)) { + radeon_state_decref(rstate); + return NULL; + } + return rstate; } -static int r600_cb(struct r600_context *rctx, struct radeon_state *rstate, int cb) +static struct radeon_state *r600_cb(struct r600_context *rctx, int cb) { struct r600_screen *rscreen = rctx->screen; struct r600_resource_texture *rtex; struct r600_resource *rbuffer; + struct radeon_state *rstate; const struct pipe_framebuffer_state *state = &rctx->framebuffer->state.framebuffer; unsigned level = state->cbufs[cb]->level; unsigned pitch, slice; unsigned color_info; unsigned format, swap, ntype; - int r; const struct util_format_description *desc; int id = R600_CB0 + cb; - r = radeon_state_init(rstate, rscreen->rw, R600_CB0_TYPE, id); - if (r) - return r; - + rstate = radeon_state(rscreen->rw, R600_CB0_TYPE, id); + if (rstate == NULL) + return NULL; rtex = (struct r600_resource_texture*)state->cbufs[cb]->texture; rbuffer = &rtex->resource; rstate->bo[0] = radeon_bo_incref(rscreen->rw, rbuffer->bo); @@ -712,25 +714,30 @@ static int r600_cb(struct r600_context *rctx, struct radeon_state *rstate, int c rstate->states[R600_CB0__CB_COLOR0_FRAG] = 0x00000000; rstate->states[R600_CB0__CB_COLOR0_TILE] = 0x00000000; rstate->states[R600_CB0__CB_COLOR0_MASK] = 0x00000000; - return radeon_state_pm4(rstate); + if (radeon_state_pm4(rstate)) { + radeon_state_decref(rstate); + return NULL; + } + return rstate; } -static int r600_db(struct r600_context *rctx, struct radeon_state *rstate) +static struct radeon_state *r600_db(struct r600_context *rctx) { struct r600_screen *rscreen = rctx->screen; struct r600_resource_texture *rtex; struct r600_resource *rbuffer; + struct radeon_state *rstate; const struct pipe_framebuffer_state *state = &rctx->framebuffer->state.framebuffer; unsigned level = state->cbufs[0]->level; unsigned pitch, slice, format; - int r; - - r = radeon_state_init(rstate, rscreen->rw, R600_DB_TYPE, R600_DB); - if (r) - return r; if (state->zsbuf == NULL) - return 0; + return NULL; + + rstate = radeon_state(rscreen->rw, R600_DB_TYPE, R600_DB); + if (rstate == NULL) + return NULL; + rtex = (struct r600_resource_texture*)state->zsbuf->texture; rbuffer = &rtex->resource; rstate->bo[0] = radeon_bo_incref(rscreen->rw, rbuffer->bo); @@ -747,22 +754,23 @@ static int r600_db(struct r600_context *rctx, struct radeon_state *rstate) rstate->states[R600_DB__DB_PREFETCH_LIMIT] = (state->zsbuf->height / 8) -1; rstate->states[R600_DB__DB_DEPTH_SIZE] = S_028000_PITCH_TILE_MAX(pitch) | S_028000_SLICE_TILE_MAX(slice); - return radeon_state_pm4(rstate); + if (radeon_state_pm4(rstate)) { + radeon_state_decref(rstate); + return NULL; + } + return rstate; } -static int r600_rasterizer(struct r600_context *rctx, struct radeon_state *rstate) +static struct radeon_state *r600_rasterizer(struct r600_context *rctx) { const struct pipe_rasterizer_state *state = &rctx->rasterizer->state.rasterizer; const struct pipe_framebuffer_state *fb = &rctx->framebuffer->state.framebuffer; struct r600_screen *rscreen = rctx->screen; + struct radeon_state *rstate; float offset_units = 0, offset_scale = 0; char depth = 0; unsigned offset_db_fmt_cntl = 0; - int r; - r = radeon_state_init(rstate, rscreen->rw, R600_RASTERIZER_TYPE, R600_RASTERIZER); - if (r) - return r; if (fb->zsbuf) { offset_units = state->offset_units; offset_scale = state->offset_scale * 12.0f; @@ -783,12 +791,15 @@ static int r600_rasterizer(struct r600_context *rctx, struct radeon_state *rstat break; default: R600_ERR("unsupported %d\n", fb->zsbuf->texture->format); - return -EINVAL; + return NULL; } } offset_db_fmt_cntl |= S_028DF8_POLY_OFFSET_NEG_NUM_DB_BITS(depth); rctx->flat_shade = state->flatshade; + rstate = radeon_state(rscreen->rw, R600_RASTERIZER_TYPE, R600_RASTERIZER); + if (rstate == NULL) + return NULL; rstate->states[R600_RASTERIZER__SPI_INTERP_CONTROL_0] = 0x00000001; rstate->states[R600_RASTERIZER__PA_CL_CLIP_CNTL] = 0x00000000; rstate->states[R600_RASTERIZER__PA_SU_SC_MODE_CNTL] = 0x00080000 | @@ -816,21 +827,25 @@ static int r600_rasterizer(struct r600_context *rctx, struct radeon_state *rstat rstate->states[R600_RASTERIZER__PA_SU_POLY_OFFSET_FRONT_OFFSET] = fui(offset_units); rstate->states[R600_RASTERIZER__PA_SU_POLY_OFFSET_BACK_SCALE] = fui(offset_scale); rstate->states[R600_RASTERIZER__PA_SU_POLY_OFFSET_BACK_OFFSET] = fui(offset_units); - return radeon_state_pm4(rstate); + if (radeon_state_pm4(rstate)) { + radeon_state_decref(rstate); + return NULL; + } + return rstate; } -static int r600_scissor(struct r600_context *rctx, struct radeon_state *rstate) +static struct radeon_state *r600_scissor(struct r600_context *rctx) { const struct pipe_scissor_state *state = &rctx->scissor->state.scissor; struct r600_screen *rscreen = rctx->screen; + struct radeon_state *rstate; u32 tl, br; - int r; - r = radeon_state_init(rstate, rscreen->rw, R600_SCISSOR_TYPE, R600_SCISSOR); - if (r) - return r; tl = S_028240_TL_X(state->minx) | S_028240_TL_Y(state->miny) | S_028240_WINDOW_OFFSET_DISABLE(1); br = S_028244_BR_X(state->maxx) | S_028244_BR_Y(state->maxy); + rstate = radeon_state(rscreen->rw, R600_SCISSOR_TYPE, R600_SCISSOR); + if (rstate == NULL) + return NULL; rstate->states[R600_SCISSOR__PA_SC_SCREEN_SCISSOR_TL] = tl; rstate->states[R600_SCISSOR__PA_SC_SCREEN_SCISSOR_BR] = br; rstate->states[R600_SCISSOR__PA_SC_WINDOW_OFFSET] = 0x00000000; @@ -850,18 +865,22 @@ static int r600_scissor(struct r600_context *rctx, struct radeon_state *rstate) rstate->states[R600_SCISSOR__PA_SC_GENERIC_SCISSOR_BR] = br; rstate->states[R600_SCISSOR__PA_SC_VPORT_SCISSOR_0_TL] = tl; rstate->states[R600_SCISSOR__PA_SC_VPORT_SCISSOR_0_BR] = br; - return radeon_state_pm4(rstate); + if (radeon_state_pm4(rstate)) { + radeon_state_decref(rstate); + return NULL; + } + return rstate; } -static int r600_viewport(struct r600_context *rctx, struct radeon_state *rstate) +static struct radeon_state *r600_viewport(struct r600_context *rctx) { const struct pipe_viewport_state *state = &rctx->viewport->state.viewport; struct r600_screen *rscreen = rctx->screen; - int r; + struct radeon_state *rstate; - r = radeon_state_init(rstate, rscreen->rw, R600_VIEWPORT_TYPE, R600_VIEWPORT); - if (r) - return r; + rstate = radeon_state(rscreen->rw, R600_VIEWPORT_TYPE, R600_VIEWPORT); + if (rstate == NULL) + return NULL; rstate->states[R600_VIEWPORT__PA_SC_VPORT_ZMIN_0] = 0x00000000; rstate->states[R600_VIEWPORT__PA_SC_VPORT_ZMAX_0] = 0x3F800000; rstate->states[R600_VIEWPORT__PA_CL_VPORT_XSCALE_0] = fui(state->scale[0]); @@ -871,22 +890,27 @@ static int r600_viewport(struct r600_context *rctx, struct radeon_state *rstate) rstate->states[R600_VIEWPORT__PA_CL_VPORT_YOFFSET_0] = fui(state->translate[1]); rstate->states[R600_VIEWPORT__PA_CL_VPORT_ZOFFSET_0] = fui(state->translate[2]); rstate->states[R600_VIEWPORT__PA_CL_VTE_CNTL] = 0x0000043F; - return radeon_state_pm4(rstate); + if (radeon_state_pm4(rstate)) { + radeon_state_decref(rstate); + return NULL; + } + return rstate; } -static int r600_dsa(struct r600_context *rctx, struct radeon_state *rstate) +static struct radeon_state *r600_dsa(struct r600_context *rctx) { const struct pipe_depth_stencil_alpha_state *state = &rctx->dsa->state.dsa; const struct pipe_stencil_ref *stencil_ref = &rctx->stencil_ref->state.stencil_ref; struct r600_screen *rscreen = rctx->screen; unsigned db_depth_control, alpha_test_control, alpha_ref, db_shader_control; unsigned stencil_ref_mask, stencil_ref_mask_bf; - int r, i; struct r600_shader *rshader = &rctx->ps_shader->shader; + struct radeon_state *rstate; + int i; - r = radeon_state_init(rstate, rscreen->rw, R600_DSA_TYPE, R600_DSA); - if (r) - return r; + rstate = radeon_state(rscreen->rw, R600_DSA_TYPE, R600_DSA); + if (rstate == NULL) + return NULL; db_shader_control = 0x210; for (i = 0; i < rshader->noutput; i++) { @@ -898,8 +922,8 @@ static int r600_dsa(struct r600_context *rctx, struct radeon_state *rstate) db_depth_control = S_028800_Z_ENABLE(state->depth.enabled) | S_028800_Z_WRITE_ENABLE(state->depth.writemask) | S_028800_ZFUNC(state->depth.func); - /* set stencil enable */ + if (state->stencil[0].enabled) { db_depth_control |= S_028800_STENCIL_ENABLE(1); db_depth_control |= S_028800_STENCILFUNC(r600_translate_ds_func(state->stencil[0].func)); @@ -946,7 +970,11 @@ static int r600_dsa(struct r600_context *rctx, struct radeon_state *rstate) rstate->states[R600_DSA__DB_SRESULTS_COMPARE_STATE1] = 0x00000000; rstate->states[R600_DSA__DB_PRELOAD_CONTROL] = 0x00000000; rstate->states[R600_DSA__DB_ALPHA_TO_MASK] = 0x0000AA00; - return radeon_state_pm4(rstate); + if (radeon_state_pm4(rstate)) { + radeon_state_decref(rstate); + return NULL; + } + return rstate; } static inline unsigned r600_tex_wrap(unsigned wrap) @@ -1024,15 +1052,16 @@ static INLINE u32 S_FIXED(float value, u32 frac_bits) return value * (1 << frac_bits); } -static int r600_sampler(struct r600_context *rctx, struct radeon_state *rstate, - const struct pipe_sampler_state *state, unsigned id) +static struct radeon_state *r600_sampler(struct r600_context *rctx, + const struct pipe_sampler_state *state, + unsigned id) { struct r600_screen *rscreen = rctx->screen; - int r; + struct radeon_state *rstate; - r = radeon_state_init(rstate, rscreen->rw, R600_PS_SAMPLER_TYPE, id); - if (r) - return r; + rstate = radeon_state(rscreen->rw, R600_PS_SAMPLER_TYPE, id); + if (rstate == NULL) + return NULL; rstate->states[R600_PS_SAMPLER__SQ_TEX_SAMPLER_WORD0_0] = S_03C000_CLAMP_X(r600_tex_wrap(state->wrap_s)) | S_03C000_CLAMP_Y(r600_tex_wrap(state->wrap_t)) | @@ -1047,7 +1076,11 @@ static int r600_sampler(struct r600_context *rctx, struct radeon_state *rstate, S_03C004_MAX_LOD(S_FIXED(CLAMP(state->max_lod, 0, 15), 6)) | S_03C004_LOD_BIAS(S_FIXED(CLAMP(state->lod_bias, -16, 16), 6)); rstate->states[R600_PS_SAMPLER__SQ_TEX_SAMPLER_WORD2_0] = S_03C008_TYPE(1); - return radeon_state_pm4(rstate); + if (radeon_state_pm4(rstate)) { + radeon_state_decref(rstate); + return NULL; + } + return rstate; } static inline unsigned r600_tex_swizzle(unsigned swizzle) @@ -1097,26 +1130,28 @@ static inline unsigned r600_tex_dim(unsigned dim) } } -static int r600_resource(struct r600_context *rctx, struct radeon_state *rstate, - const struct pipe_sampler_view *view, unsigned id) +static struct radeon_state *r600_resource(struct r600_context *rctx, + const struct pipe_sampler_view *view, + unsigned id) { struct r600_screen *rscreen = rctx->screen; const struct util_format_description *desc; struct r600_resource_texture *tmp; struct r600_resource *rbuffer; + struct radeon_state *rstate; unsigned format; - int r; - r = radeon_state_init(rstate, rscreen->rw, R600_PS_RESOURCE_TYPE, id); - if (r) - return r; format = r600_translate_colorformat(view->texture->format); if (format == ~0) - return -EINVAL; + return NULL; desc = util_format_description(view->texture->format); if (desc == NULL) { R600_ERR("unknow format %d\n", view->texture->format); - return -EINVAL; + return NULL; + } + rstate = radeon_state(rscreen->rw, R600_PS_RESOURCE_TYPE, id); + if (rstate == NULL) { + return NULL; } tmp = (struct r600_resource_texture*)view->texture; rbuffer = &tmp->resource; @@ -1159,16 +1194,21 @@ static int r600_resource(struct r600_context *rctx, struct radeon_state *rstate, S_038014_LAST_ARRAY(0); rstate->states[R600_PS_RESOURCE__RESOURCE0_WORD6] = S_038018_TYPE(V_038010_SQ_TEX_VTX_VALID_TEXTURE); - return radeon_state_pm4(rstate); + if (radeon_state_pm4(rstate)) { + radeon_state_decref(rstate); + return NULL; + } + return rstate; } -static int r600_cb_cntl(struct r600_context *rctx, struct radeon_state *rstate) +static struct radeon_state *r600_cb_cntl(struct r600_context *rctx) { struct r600_screen *rscreen = rctx->screen; + struct radeon_state *rstate; const struct pipe_blend_state *pbs = &rctx->blend->state.blend; int nr_cbufs = rctx->framebuffer->state.framebuffer.nr_cbufs; uint32_t color_control, target_mask, shader_mask; - int i, r; + int i; target_mask = 0; shader_mask = 0; @@ -1190,10 +1230,7 @@ static int r600_cb_cntl(struct r600_context *rctx, struct radeon_state *rstate) target_mask |= (pbs->rt[i].colormask << (4 * i)); } - r = radeon_state_init(rstate, rscreen->rw, R600_CB_CNTL_TYPE, R600_CB_CNTL); - if (r) - return r; - + rstate = radeon_state(rscreen->rw, R600_CB_CNTL_TYPE, R600_CB_CNTL); rstate->states[R600_CB_CNTL__CB_SHADER_MASK] = shader_mask; rstate->states[R600_CB_CNTL__CB_TARGET_MASK] = target_mask; rstate->states[R600_CB_CNTL__CB_COLOR_CONTROL] = color_control; @@ -1205,7 +1242,11 @@ static int r600_cb_cntl(struct r600_context *rctx, struct radeon_state *rstate) rstate->states[R600_CB_CNTL__CB_CLRCMP_DST] = 0x000000FF; rstate->states[R600_CB_CNTL__CB_CLRCMP_MSK] = 0xFFFFFFFF; rstate->states[R600_CB_CNTL__PA_SC_AA_MASK] = 0xFFFFFFFF; - return radeon_state_pm4(rstate); + if (radeon_state_pm4(rstate)) { + radeon_state_decref(rstate); + return NULL; + } + return rstate; } int r600_context_hw_states(struct r600_context *rctx) @@ -1217,77 +1258,97 @@ int r600_context_hw_states(struct r600_context *rctx) /* free previous TODO determine what need to be updated, what * doesn't */ - memset(&rctx->hw_states, 0, sizeof(struct r600_context_hw_states)); + //radeon_state_decref(rctx->hw_states.config); + rctx->hw_states.cb_cntl = radeon_state_decref(rctx->hw_states.cb_cntl); + rctx->hw_states.db = radeon_state_decref(rctx->hw_states.db); + rctx->hw_states.rasterizer = radeon_state_decref(rctx->hw_states.rasterizer); + rctx->hw_states.scissor = radeon_state_decref(rctx->hw_states.scissor); + rctx->hw_states.dsa = radeon_state_decref(rctx->hw_states.dsa); + rctx->hw_states.blend = radeon_state_decref(rctx->hw_states.blend); + rctx->hw_states.viewport = radeon_state_decref(rctx->hw_states.viewport); + for (i = 0; i < 8; i++) { + rctx->hw_states.cb[i] = radeon_state_decref(rctx->hw_states.cb[i]); + } + for (i = 0; i < rctx->hw_states.ps_nresource; i++) { + radeon_state_decref(rctx->hw_states.ps_resource[i]); + rctx->hw_states.ps_resource[i] = NULL; + } + rctx->hw_states.ps_nresource = 0; + for (i = 0; i < rctx->hw_states.ps_nsampler; i++) { + radeon_state_decref(rctx->hw_states.ps_sampler[i]); + rctx->hw_states.ps_sampler[i] = NULL; + } + rctx->hw_states.ps_nsampler = 0; /* build new states */ - rctx->hw_states.config = rctx->config; - r600_rasterizer(rctx, &rctx->hw_states.rasterizer); - r600_scissor(rctx, &rctx->hw_states.scissor); - r600_dsa(rctx, &rctx->hw_states.dsa); - r600_blend(rctx, &rctx->hw_states.blend); - r600_viewport(rctx, &rctx->hw_states.viewport); - for (i = 0; i < nr_cbufs; i++) - r600_cb(rctx, &rctx->hw_states.cb[i], i); - r600_db(rctx, &rctx->hw_states.db); - r600_cb_cntl(rctx, &rctx->hw_states.cb_cntl); + rctx->hw_states.rasterizer = r600_rasterizer(rctx); + rctx->hw_states.scissor = r600_scissor(rctx); + rctx->hw_states.dsa = r600_dsa(rctx); + rctx->hw_states.blend = r600_blend(rctx); + rctx->hw_states.viewport = r600_viewport(rctx); + for (i = 0; i < nr_cbufs; i++) { + rctx->hw_states.cb[i] = r600_cb(rctx, i); + } + rctx->hw_states.db = r600_db(rctx); + rctx->hw_states.cb_cntl = r600_cb_cntl(rctx); for (i = 0; i < rctx->ps_nsampler; i++) { if (rctx->ps_sampler[i]) { - r600_sampler(rctx, &rctx->hw_states.ps_sampler[i], - &rctx->ps_sampler[i]->state.sampler, - R600_PS_SAMPLER + i); + rctx->hw_states.ps_sampler[i] = r600_sampler(rctx, + &rctx->ps_sampler[i]->state.sampler, + R600_PS_SAMPLER + i); } } rctx->hw_states.ps_nsampler = rctx->ps_nsampler; for (i = 0; i < rctx->ps_nsampler_view; i++) { if (rctx->ps_sampler_view[i]) { - r600_resource(rctx, &rctx->hw_states.ps_resource[i], - &rctx->ps_sampler_view[i]->state.sampler_view, - R600_PS_RESOURCE + i); + rctx->hw_states.ps_resource[i] = r600_resource(rctx, + &rctx->ps_sampler_view[i]->state.sampler_view, + R600_PS_RESOURCE + i); } } rctx->hw_states.ps_nresource = rctx->ps_nsampler_view; /* bind states */ - r = radeon_draw_set(&rctx->draw, &rctx->hw_states.db); + r = radeon_draw_set(rctx->draw, rctx->hw_states.db); if (r) return r; - r = radeon_draw_set(&rctx->draw, &rctx->hw_states.rasterizer); + r = radeon_draw_set(rctx->draw, rctx->hw_states.rasterizer); if (r) return r; - r = radeon_draw_set(&rctx->draw, &rctx->hw_states.scissor); + r = radeon_draw_set(rctx->draw, rctx->hw_states.scissor); if (r) return r; - r = radeon_draw_set(&rctx->draw, &rctx->hw_states.dsa); + r = radeon_draw_set(rctx->draw, rctx->hw_states.dsa); if (r) return r; - r = radeon_draw_set(&rctx->draw, &rctx->hw_states.blend); + r = radeon_draw_set(rctx->draw, rctx->hw_states.blend); if (r) return r; - r = radeon_draw_set(&rctx->draw, &rctx->hw_states.viewport); + r = radeon_draw_set(rctx->draw, rctx->hw_states.viewport); if (r) return r; for (i = 0; i < nr_cbufs; i++) { - r = radeon_draw_set(&rctx->draw, &rctx->hw_states.cb[i]); + r = radeon_draw_set(rctx->draw, rctx->hw_states.cb[i]); if (r) return r; } - r = radeon_draw_set(&rctx->draw, &rctx->hw_states.config); + r = radeon_draw_set(rctx->draw, rctx->hw_states.config); if (r) return r; - r = radeon_draw_set(&rctx->draw, &rctx->hw_states.cb_cntl); + r = radeon_draw_set(rctx->draw, rctx->hw_states.cb_cntl); if (r) return r; for (i = 0; i < rctx->hw_states.ps_nresource; i++) { - if (rctx->hw_states.ps_resource[i].valid) { - r = radeon_draw_set(&rctx->draw, &rctx->hw_states.ps_resource[i]); + if (rctx->hw_states.ps_resource[i]) { + r = radeon_draw_set(rctx->draw, rctx->hw_states.ps_resource[i]); if (r) return r; } } for (i = 0; i < rctx->hw_states.ps_nsampler; i++) { - if (rctx->hw_states.ps_sampler[i].valid) { - r = radeon_draw_set(&rctx->draw, &rctx->hw_states.ps_sampler[i]); + if (rctx->hw_states.ps_sampler[i]) { + r = radeon_draw_set(rctx->draw, rctx->hw_states.ps_sampler[i]); if (r) return r; } diff --git a/src/gallium/drivers/r600/radeon.h b/src/gallium/drivers/r600/radeon.h index 709ef8a85a..3a8405f9b4 100644 --- a/src/gallium/drivers/r600/radeon.h +++ b/src/gallium/drivers/r600/radeon.h @@ -103,17 +103,17 @@ int radeon_bo_wait(struct radeon *radeon, struct radeon_bo *bo); */ struct radeon_state { struct radeon *radeon; - unsigned valid; + unsigned refcount; unsigned type; unsigned id; unsigned nstates; - u32 states[64]; + u32 *states; unsigned npm4; unsigned cpm4; u32 pm4_crc; - u32 pm4[128]; + u32 *pm4; u32 nimmd; - u32 immd[64]; + u32 *immd; unsigned nbo; struct radeon_bo *bo[4]; unsigned nreloc; @@ -123,51 +123,35 @@ struct radeon_state { unsigned bo_dirty[4]; }; -int radeon_state_init(struct radeon_state *state, struct radeon *radeon, u32 type, u32 id); +struct radeon_state *radeon_state(struct radeon *radeon, u32 type, u32 id); +struct radeon_state *radeon_state_incref(struct radeon_state *state); +struct radeon_state *radeon_state_decref(struct radeon_state *state); int radeon_state_pm4(struct radeon_state *state); /* * draw functions */ struct radeon_draw { + unsigned refcount; struct radeon *radeon; unsigned nstate; - struct radeon_state state[1273]; + struct radeon_state **state; unsigned cpm4; }; -int radeon_draw_init(struct radeon_draw *draw, struct radeon *radeon); +struct radeon_draw *radeon_draw(struct radeon *radeon); +struct radeon_draw *radeon_draw_duplicate(struct radeon_draw *draw); +struct radeon_draw *radeon_draw_incref(struct radeon_draw *draw); +struct radeon_draw *radeon_draw_decref(struct radeon_draw *draw); int radeon_draw_set(struct radeon_draw *draw, struct radeon_state *state); +int radeon_draw_set_new(struct radeon_draw *draw, struct radeon_state *state); int radeon_draw_check(struct radeon_draw *draw); -/* - * Context - */ -#pragma pack(1) -struct radeon_cs_reloc { - uint32_t handle; - uint32_t read_domain; - uint32_t write_domain; - uint32_t flags; -}; -#pragma pack() - -struct radeon_ctx { - struct radeon *radeon; - u32 *pm4; - u32 cpm4; - u32 draw_cpm4; - unsigned id; - unsigned nreloc; - struct radeon_cs_reloc reloc[2048]; - unsigned nbo; - struct radeon_bo *bo[2048]; - unsigned ndraw; - struct radeon_draw draw[128]; -}; - -int radeon_ctx_init(struct radeon_ctx *ctx, struct radeon *radeon); +struct radeon_ctx *radeon_ctx(struct radeon *radeon); +struct radeon_ctx *radeon_ctx_decref(struct radeon_ctx *ctx); +struct radeon_ctx *radeon_ctx_incref(struct radeon_ctx *ctx); int radeon_ctx_set_draw(struct radeon_ctx *ctx, struct radeon_draw *draw); +int radeon_ctx_set_draw_new(struct radeon_ctx *ctx, struct radeon_draw *draw); int radeon_ctx_pm4(struct radeon_ctx *ctx); int radeon_ctx_submit(struct radeon_ctx *ctx); void radeon_ctx_dump_bof(struct radeon_ctx *ctx, const char *file); diff --git a/src/gallium/targets/dri-r600/Makefile b/src/gallium/targets/dri-r600/Makefile index 9c8b4ab252..932303d194 100644 --- a/src/gallium/targets/dri-r600/Makefile +++ b/src/gallium/targets/dri-r600/Makefile @@ -4,12 +4,12 @@ include $(TOP)/configs/current LIBNAME = r600_dri.so PIPE_DRIVERS = \ - $(TOP)/src/gallium/drivers/r600/libr600.a \ $(TOP)/src/gallium/state_trackers/dri/drm/libdridrm.a \ $(TOP)/src/gallium/winsys/r600/drm/libr600winsys.a \ $(TOP)/src/gallium/drivers/softpipe/libsoftpipe.a \ $(TOP)/src/gallium/drivers/trace/libtrace.a \ - $(TOP)/src/gallium/drivers/rbug/librbug.a + $(TOP)/src/gallium/drivers/rbug/librbug.a \ + $(TOP)/src/gallium/drivers/r600/libr600.a C_SOURCES = \ target.c \ diff --git a/src/gallium/winsys/r600/drm/radeon.c b/src/gallium/winsys/r600/drm/radeon.c index 24d821d5cf..7e65669806 100644 --- a/src/gallium/winsys/r600/drm/radeon.c +++ b/src/gallium/winsys/r600/drm/radeon.c @@ -43,6 +43,16 @@ static int radeon_get_device(struct radeon *radeon) return r; } +/* symbol missing drove me crazy hack to get symbol exported */ +static void fake(void) +{ + struct radeon_ctx *ctx; + struct radeon_draw *draw; + + ctx = radeon_ctx(NULL); + draw = radeon_draw(NULL); +} + struct radeon *radeon_new(int fd, unsigned device) { struct radeon *radeon; @@ -50,6 +60,7 @@ struct radeon *radeon_new(int fd, unsigned device) radeon = calloc(1, sizeof(*radeon)); if (radeon == NULL) { + fake(); return NULL; } radeon->fd = fd; diff --git a/src/gallium/winsys/r600/drm/radeon_ctx.c b/src/gallium/winsys/r600/drm/radeon_ctx.c index af270d5d20..6b0eba0b28 100644 --- a/src/gallium/winsys/r600/drm/radeon_ctx.c +++ b/src/gallium/winsys/r600/drm/radeon_ctx.c @@ -32,8 +32,13 @@ int radeon_ctx_set_bo_new(struct radeon_ctx *ctx, struct radeon_bo *bo) { - if (ctx->nbo >= 2048) - return -EBUSY; + void *ptr; + + ptr = realloc(ctx->bo, sizeof(struct radeon_bo) * (ctx->nbo + 1)); + if (ptr == NULL) { + return -ENOMEM; + } + ctx->bo = ptr; ctx->bo[ctx->nbo] = bo; ctx->nbo++; return 0; @@ -71,26 +76,49 @@ void radeon_ctx_get_placement(struct radeon_ctx *ctx, unsigned reloc, u32 *place } } -static void radeon_ctx_clear(struct radeon_ctx *ctx) +struct radeon_ctx *radeon_ctx(struct radeon *radeon) { - ctx->draw_cpm4 = 0; - ctx->cpm4 = 0; - ctx->ndraw = 0; - ctx->nbo = 0; - ctx->nreloc = 0; + struct radeon_ctx *ctx; + + if (radeon == NULL) + return NULL; + ctx = calloc(1, sizeof(*ctx)); + if (ctx == NULL) + return NULL; + ctx->radeon = radeon_incref(radeon); + return ctx; } -int radeon_ctx_init(struct radeon_ctx *ctx, struct radeon *radeon) +struct radeon_ctx *radeon_ctx_incref(struct radeon_ctx *ctx) { - memset(ctx, 0, sizeof(struct radeon_ctx)); - ctx->radeon = radeon_incref(radeon); - radeon_ctx_clear(ctx); + ctx->refcount++; + return ctx; +} + +struct radeon_ctx *radeon_ctx_decref(struct radeon_ctx *ctx) +{ + unsigned i; + + if (ctx == NULL) + return NULL; + if (--ctx->refcount > 0) { + return NULL; + } + + for (i = 0; i < ctx->ndraw; i++) { + ctx->draw[i] = radeon_draw_decref(ctx->draw[i]); + } + for (i = 0; i < ctx->nbo; i++) { + ctx->bo[i] = radeon_bo_decref(ctx->radeon, ctx->bo[i]); + } + ctx->radeon = radeon_decref(ctx->radeon); + free(ctx->draw); + free(ctx->bo); free(ctx->pm4); - ctx->cpm4 = 0; - ctx->pm4 = malloc(64 * 1024); - if (ctx->pm4 == NULL) - return -ENOMEM; - return 0; + free(ctx->reloc); + memset(ctx, 0, sizeof(*ctx)); + free(ctx); + return NULL; } static int radeon_ctx_state_bo(struct radeon_ctx *ctx, struct radeon_state *state) @@ -115,6 +143,7 @@ static int radeon_ctx_state_bo(struct radeon_ctx *ctx, struct radeon_state *stat return 0; } + int radeon_ctx_submit(struct radeon_ctx *ctx) { struct drm_radeon_cs drmib; @@ -141,7 +170,6 @@ int radeon_ctx_submit(struct radeon_ctx *ctx) r = drmCommandWriteRead(ctx->radeon->fd, DRM_RADEON_CS, &drmib, sizeof(struct drm_radeon_cs)); #endif - radeon_ctx_clear(ctx); return r; } @@ -149,6 +177,7 @@ static int radeon_ctx_reloc(struct radeon_ctx *ctx, struct radeon_bo *bo, unsigned id, unsigned *placement) { unsigned i; + struct radeon_cs_reloc *ptr; for (i = 0; i < ctx->nreloc; i++) { if (ctx->reloc[i].handle == bo->handle) { @@ -156,12 +185,14 @@ static int radeon_ctx_reloc(struct radeon_ctx *ctx, struct radeon_bo *bo, return 0; } } - if (ctx->nreloc >= 2048) - return -EINVAL; - ctx->reloc[ctx->nreloc].handle = bo->handle; - ctx->reloc[ctx->nreloc].read_domain = placement[0] | placement [1]; - ctx->reloc[ctx->nreloc].write_domain = placement[0] | placement [1]; - ctx->reloc[ctx->nreloc].flags = 0; + ptr = realloc(ctx->reloc, sizeof(struct radeon_cs_reloc) * (ctx->nreloc + 1)); + if (ptr == NULL) + return -ENOMEM; + ctx->reloc = ptr; + ptr[ctx->nreloc].handle = bo->handle; + ptr[ctx->nreloc].read_domain = placement[0] | placement [1]; + ptr[ctx->nreloc].write_domain = placement[0] | placement [1]; + ptr[ctx->nreloc].flags = 0; ctx->pm4[id] = ctx->nreloc * sizeof(struct radeon_cs_reloc) / 4; ctx->nreloc++; return 0; @@ -190,13 +221,21 @@ static int radeon_ctx_state_schedule(struct radeon_ctx *ctx, struct radeon_state return 0; } -int radeon_ctx_set_draw(struct radeon_ctx *ctx, struct radeon_draw *draw) +int radeon_ctx_set_draw_new(struct radeon_ctx *ctx, struct radeon_draw *draw) { - unsigned cpm4, i; + struct radeon_draw *pdraw = NULL; + struct radeon_draw **ndraw; + struct radeon_state *nstate, *ostate; + unsigned cpm4, i, cstate; + void *tmp; int r = 0; + ndraw = realloc(ctx->draw, sizeof(void*) * (ctx->ndraw + 1)); + if (ndraw == NULL) + return -ENOMEM; + ctx->draw = ndraw; for (i = 0; i < draw->nstate; i++) { - r = radeon_ctx_state_bo(ctx, &draw->state[i]); + r = radeon_ctx_state_bo(ctx, draw->state[i]); if (r) return r; } @@ -208,48 +247,69 @@ int radeon_ctx_set_draw(struct radeon_ctx *ctx, struct radeon_draw *draw) __func__, draw->cpm4, RADEON_CTX_MAX_PM4); return -EINVAL; } - ctx->draw[ctx->ndraw] = *draw; - for (i = 0, cpm4 = 0; i < draw->nstate - 1; i++) { - ctx->draw[ctx->ndraw].state[i].valid &= ~2; - if (ctx->draw[ctx->ndraw].state[i].valid) { - if (ctx->ndraw > 1 && ctx->draw[ctx->ndraw - 1].state[i].valid) { - if (ctx->draw[ctx->ndraw - 1].state[i].pm4_crc == draw->state[i].pm4_crc) - continue; + tmp = realloc(ctx->state, (ctx->nstate + draw->nstate) * sizeof(void*)); + if (tmp == NULL) + return -ENOMEM; + ctx->state = tmp; + pdraw = ctx->cdraw; + for (i = 0, cpm4 = 0, cstate = ctx->nstate; i < draw->nstate - 1; i++) { + nstate = draw->state[i]; + if (nstate) { + if (pdraw && pdraw->state[i]) { + ostate = pdraw->state[i]; + if (ostate->pm4_crc != nstate->pm4_crc) { + ctx->state[cstate++] = nstate; + cpm4 += nstate->cpm4; + } + } else { + ctx->state[cstate++] = nstate; + cpm4 += nstate->cpm4; } - ctx->draw[ctx->ndraw].state[i].valid |= 2; - cpm4 += ctx->draw[ctx->ndraw].state[i].cpm4; } } /* The last state is the draw state always add it */ - if (!draw->state[i].valid) { + if (draw->state[i] == NULL) { fprintf(stderr, "%s no draw command\n", __func__); return -EINVAL; } - ctx->draw[ctx->ndraw].state[i].valid |= 2; - cpm4 += ctx->draw[ctx->ndraw].state[i].cpm4; + ctx->state[cstate++] = draw->state[i]; + cpm4 += draw->state[i]->cpm4; if ((ctx->draw_cpm4 + cpm4) > RADEON_CTX_MAX_PM4) { /* need to flush */ return -EBUSY; } ctx->draw_cpm4 += cpm4; - ctx->ndraw++; + ctx->nstate = cstate; + ctx->draw[ctx->ndraw++] = draw; + ctx->cdraw = draw; return 0; } +int radeon_ctx_set_draw(struct radeon_ctx *ctx, struct radeon_draw *draw) +{ + int r; + + radeon_draw_incref(draw); + r = radeon_ctx_set_draw_new(ctx, draw); + if (r) + radeon_draw_decref(draw); + return r; +} + int radeon_ctx_pm4(struct radeon_ctx *ctx) { - unsigned i, j, c; + unsigned i; int r; - for (i = 0, c = 0, ctx->id = 0; i < ctx->ndraw; i++) { - for (j = 0; j < ctx->draw[i].nstate; j++) { - if (ctx->draw[i].state[j].valid & 2) { - r = radeon_ctx_state_schedule(ctx, &ctx->draw[i].state[j]); - if (r) - return r; - c += ctx->draw[i].state[j].cpm4; - } - } + free(ctx->pm4); + ctx->cpm4 = 0; + ctx->pm4 = malloc(ctx->draw_cpm4 * 4); + if (ctx->pm4 == NULL) + return -EINVAL; + for (i = 0, ctx->id = 0; i < ctx->nstate; i++) { + r = radeon_ctx_state_schedule(ctx, ctx->state[i]); + if (r) + return r; } if (ctx->id != ctx->draw_cpm4) { fprintf(stderr, "%s miss predicted pm4 size %d for %d\n", diff --git a/src/gallium/winsys/r600/drm/radeon_draw.c b/src/gallium/winsys/r600/drm/radeon_draw.c index 53699eb0b1..4413ed79fb 100644 --- a/src/gallium/winsys/r600/drm/radeon_draw.c +++ b/src/gallium/winsys/r600/drm/radeon_draw.c @@ -31,33 +31,111 @@ /* * draw functions */ -int radeon_draw_init(struct radeon_draw *draw, struct radeon *radeon) +struct radeon_draw *radeon_draw(struct radeon *radeon) { - memset(draw, 0, sizeof(struct radeon_draw)); + struct radeon_draw *draw; + + draw = calloc(1, sizeof(*draw)); + if (draw == NULL) + return NULL; draw->nstate = radeon->nstate; draw->radeon = radeon; - return 0; + draw->refcount = 1; + draw->state = calloc(1, sizeof(void*) * draw->nstate); + if (draw->state == NULL) { + free(draw); + return NULL; + } + return draw; } -int radeon_draw_set(struct radeon_draw *draw, struct radeon_state *state) +struct radeon_draw *radeon_draw_incref(struct radeon_draw *draw) +{ + draw->refcount++; + return draw; +} + +struct radeon_draw *radeon_draw_decref(struct radeon_draw *draw) +{ + unsigned i; + + if (draw == NULL) + return NULL; + if (--draw->refcount > 0) + return NULL; + for (i = 0; i < draw->nstate; i++) { + draw->state[i] = radeon_state_decref(draw->state[i]); + } + free(draw->state); + memset(draw, 0, sizeof(*draw)); + free(draw); + return NULL; +} + +int radeon_draw_set_new(struct radeon_draw *draw, struct radeon_state *state) { if (state == NULL) return 0; if (state->type >= draw->radeon->ntype) return -EINVAL; - draw->state[state->id] = *state; + draw->state[state->id] = radeon_state_decref(draw->state[state->id]); + draw->state[state->id] = state; return 0; } +int radeon_draw_set(struct radeon_draw *draw, struct radeon_state *state) +{ + if (state == NULL) + return 0; + radeon_state_incref(state); + return radeon_draw_set_new(draw, state); +} + int radeon_draw_check(struct radeon_draw *draw) { unsigned i; int r; + r = radeon_draw_pm4(draw); + if (r) + return r; for (i = 0, draw->cpm4 = 0; i < draw->nstate; i++) { - if (draw->state[i].valid) { - draw->cpm4 += draw->state[i].cpm4; + if (draw->state[i]) { + draw->cpm4 += draw->state[i]->cpm4; } } return 0; } + +struct radeon_draw *radeon_draw_duplicate(struct radeon_draw *draw) +{ + struct radeon_draw *ndraw; + unsigned i; + + if (draw == NULL) + return NULL; + ndraw = radeon_draw(draw->radeon); + if (ndraw == NULL) { + return NULL; + } + for (i = 0; i < draw->nstate; i++) { + if (radeon_draw_set(ndraw, draw->state[i])) { + radeon_draw_decref(ndraw); + return NULL; + } + } + return ndraw; +} + +int radeon_draw_pm4(struct radeon_draw *draw) +{ + unsigned i; + int r; + + for (i = 0; i < draw->nstate; i++) { + r = radeon_state_pm4(draw->state[i]); + if (r) + return r; + } + return 0; +} diff --git a/src/gallium/winsys/r600/drm/radeon_priv.h b/src/gallium/winsys/r600/drm/radeon_priv.h index 80392cda96..b91421f438 100644 --- a/src/gallium/winsys/r600/drm/radeon_priv.h +++ b/src/gallium/winsys/r600/drm/radeon_priv.h @@ -24,6 +24,7 @@ #include "radeon.h" struct radeon; +struct radeon_ctx; /* * radeon functions @@ -70,6 +71,34 @@ extern unsigned radeon_type_from_id(struct radeon *radeon, unsigned id); /* * radeon context functions */ +#pragma pack(1) +struct radeon_cs_reloc { + uint32_t handle; + uint32_t read_domain; + uint32_t write_domain; + uint32_t flags; +}; +#pragma pack() + +struct radeon_ctx { + int refcount; + struct radeon *radeon; + u32 *pm4; + u32 cpm4; + u32 draw_cpm4; + unsigned id; + unsigned next_id; + unsigned nreloc; + struct radeon_cs_reloc *reloc; + unsigned nbo; + struct radeon_bo **bo; + unsigned ndraw; + struct radeon_draw *cdraw; + struct radeon_draw **draw; + unsigned nstate; + struct radeon_state **state; +}; + int radeon_ctx_set_bo_new(struct radeon_ctx *ctx, struct radeon_bo *bo); struct radeon_bo *radeon_ctx_get_bo(struct radeon_ctx *ctx, unsigned reloc); void radeon_ctx_get_placement(struct radeon_ctx *ctx, unsigned reloc, u32 *placement); diff --git a/src/gallium/winsys/r600/drm/radeon_state.c b/src/gallium/winsys/r600/drm/radeon_state.c index d7cd1d7a94..308288557a 100644 --- a/src/gallium/winsys/r600/drm/radeon_state.c +++ b/src/gallium/winsys/r600/drm/radeon_state.c @@ -32,23 +32,82 @@ /* * state core functions */ -int radeon_state_init(struct radeon_state *state, struct radeon *radeon, u32 type, u32 id) +struct radeon_state *radeon_state(struct radeon *radeon, u32 type, u32 id) { + struct radeon_state *state; + if (type > radeon->ntype) { fprintf(stderr, "%s invalid type %d\n", __func__, type); - return -EINVAL; + return NULL; } if (id > radeon->nstate) { fprintf(stderr, "%s invalid state id %d\n", __func__, id); - return -EINVAL; + return NULL; } - memset(state, 0, sizeof(struct radeon_state)); + state = calloc(1, sizeof(*state)); + if (state == NULL) + return NULL; state->radeon = radeon; state->type = type; state->id = id; + state->refcount = 1; state->npm4 = radeon->type[type].npm4; state->nstates = radeon->type[type].nstates; - return 0; + state->states = calloc(1, state->nstates * 4); + state->pm4 = calloc(1, radeon->type[type].npm4 * 4); + if (state->states == NULL || state->pm4 == NULL) { + radeon_state_decref(state); + return NULL; + } + return state; +} + +struct radeon_state *radeon_state_duplicate(struct radeon_state *state) +{ + struct radeon_state *nstate = radeon_state(state->radeon, state->type, state->id); + unsigned i; + + if (state == NULL) + return NULL; + nstate->cpm4 = state->cpm4; + nstate->nbo = state->nbo; + nstate->nreloc = state->nreloc; + memcpy(nstate->states, state->states, state->nstates * 4); + memcpy(nstate->pm4, state->pm4, state->npm4 * 4); + memcpy(nstate->placement, state->placement, 8 * 4); + memcpy(nstate->reloc_pm4_id, state->reloc_pm4_id, 8 * 4); + memcpy(nstate->reloc_bo_id, state->reloc_bo_id, 8 * 4); + memcpy(nstate->bo_dirty, state->bo_dirty, 4 * 4); + for (i = 0; i < state->nbo; i++) { + nstate->bo[i] = radeon_bo_incref(state->radeon, state->bo[i]); + } + return nstate; +} + +struct radeon_state *radeon_state_incref(struct radeon_state *state) +{ + state->refcount++; + return state; +} + +struct radeon_state *radeon_state_decref(struct radeon_state *state) +{ + unsigned i; + + if (state == NULL) + return NULL; + if (--state->refcount > 0) { + return NULL; + } + for (i = 0; i < state->nbo; i++) { + state->bo[i] = radeon_bo_decref(state->radeon, state->bo[i]); + } + free(state->immd); + free(state->states); + free(state->pm4); + memset(state, 0, sizeof(*state)); + free(state); + return NULL; } int radeon_state_replace_always(struct radeon_state *ostate, @@ -97,7 +156,6 @@ int radeon_state_pm4(struct radeon_state *state) return r; } state->pm4_crc = crc32(state->pm4, state->cpm4 * 4); - state->valid = 1; return 0; } -- cgit v1.2.3 From 27041d7cb3faeaed483538a228573466363ec1c7 Mon Sep 17 00:00:00 2001 From: Jerome Glisse Date: Fri, 6 Aug 2010 11:28:33 -0400 Subject: r600g: fix color format, indentation, defines Signed-off-by: Jerome Glisse --- src/gallium/drivers/r600/r600_state.c | 2 +- src/gallium/drivers/r600/r600_state_inlines.h | 161 +++++++++++++------------- src/gallium/drivers/r600/r600d.h | 19 +-- 3 files changed, 93 insertions(+), 89 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index e8871cd748..deb9bf3395 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -695,7 +695,7 @@ static struct radeon_state *r600_cb(struct r600_context *rctx, int cb) ntype = 0; desc = util_format_description(rtex->resource.base.b.format); if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) - ntype = NUM_FORMAT_SRGB; + ntype = V_0280A0_NUMBER_SRGB; format = r600_translate_colorformat(rtex->resource.base.b.format); swap = r600_translate_colorswap(rtex->resource.base.b.format); diff --git a/src/gallium/drivers/r600/r600_state_inlines.h b/src/gallium/drivers/r600/r600_state_inlines.h index fdc29386ae..8271ad19fb 100644 --- a/src/gallium/drivers/r600/r600_state_inlines.h +++ b/src/gallium/drivers/r600/r600_state_inlines.h @@ -147,117 +147,117 @@ static uint32_t r600_translate_colorswap(enum pipe_format format) { switch (format) { /* 8-bit buffers. */ - case PIPE_FORMAT_A8_UNORM: - case PIPE_FORMAT_I8_UNORM: - case PIPE_FORMAT_L8_UNORM: - case PIPE_FORMAT_R8_UNORM: - case PIPE_FORMAT_R8_SNORM: - return SWAP_STD; + case PIPE_FORMAT_A8_UNORM: + case PIPE_FORMAT_I8_UNORM: + case PIPE_FORMAT_L8_UNORM: + case PIPE_FORMAT_R8_UNORM: + case PIPE_FORMAT_R8_SNORM: + return V_0280A0_SWAP_STD; /* 16-bit buffers. */ - case PIPE_FORMAT_B5G6R5_UNORM: - return SWAP_STD_REV; + case PIPE_FORMAT_B5G6R5_UNORM: + return V_0280A0_SWAP_STD_REV; - case PIPE_FORMAT_B5G5R5A1_UNORM: - case PIPE_FORMAT_B5G5R5X1_UNORM: - return SWAP_ALT; + case PIPE_FORMAT_B5G5R5A1_UNORM: + case PIPE_FORMAT_B5G5R5X1_UNORM: + return V_0280A0_SWAP_ALT; - case PIPE_FORMAT_B4G4R4A4_UNORM: - case PIPE_FORMAT_B4G4R4X4_UNORM: - return SWAP_ALT; + case PIPE_FORMAT_B4G4R4A4_UNORM: + case PIPE_FORMAT_B4G4R4X4_UNORM: + return V_0280A0_SWAP_ALT; /* 32-bit buffers. */ - case PIPE_FORMAT_A8B8G8R8_SRGB: - return SWAP_STD_REV; - case PIPE_FORMAT_B8G8R8A8_SRGB: - return SWAP_ALT; + case PIPE_FORMAT_A8B8G8R8_SRGB: + return V_0280A0_SWAP_STD_REV; + case PIPE_FORMAT_B8G8R8A8_SRGB: + return V_0280A0_SWAP_ALT; - case PIPE_FORMAT_B8G8R8A8_UNORM: - case PIPE_FORMAT_B8G8R8X8_UNORM: - return SWAP_ALT; + case PIPE_FORMAT_B8G8R8A8_UNORM: + case PIPE_FORMAT_B8G8R8X8_UNORM: + return V_0280A0_SWAP_ALT; - case PIPE_FORMAT_A8R8G8B8_UNORM: - case PIPE_FORMAT_X8R8G8B8_UNORM: - return SWAP_ALT_REV; - case PIPE_FORMAT_R8G8B8A8_SNORM: - case PIPE_FORMAT_R8G8B8X8_UNORM: - return SWAP_STD; + case PIPE_FORMAT_A8R8G8B8_UNORM: + case PIPE_FORMAT_X8R8G8B8_UNORM: + return V_0280A0_SWAP_ALT_REV; + case PIPE_FORMAT_R8G8B8A8_SNORM: + case PIPE_FORMAT_R8G8B8X8_UNORM: + return V_0280A0_SWAP_STD; - case PIPE_FORMAT_A8B8G8R8_UNORM: - case PIPE_FORMAT_X8B8G8R8_UNORM: -// case PIPE_FORMAT_R8SG8SB8UX8U_NORM: - return SWAP_STD_REV; + case PIPE_FORMAT_A8B8G8R8_UNORM: + case PIPE_FORMAT_X8B8G8R8_UNORM: + // case PIPE_FORMAT_R8SG8SB8UX8U_NORM: + return V_0280A0_SWAP_STD_REV; case PIPE_FORMAT_Z24X8_UNORM: case PIPE_FORMAT_Z24_UNORM_S8_USCALED: - return SWAP_STD; + return V_0280A0_SWAP_STD; - case PIPE_FORMAT_R10G10B10A2_UNORM: - case PIPE_FORMAT_R10G10B10X2_SNORM: - case PIPE_FORMAT_B10G10R10A2_UNORM: - case PIPE_FORMAT_R10SG10SB10SA2U_NORM: - return SWAP_STD_REV; + case PIPE_FORMAT_R10G10B10A2_UNORM: + case PIPE_FORMAT_R10G10B10X2_SNORM: + case PIPE_FORMAT_B10G10R10A2_UNORM: + case PIPE_FORMAT_R10SG10SB10SA2U_NORM: + return V_0280A0_SWAP_STD_REV; /* 64-bit buffers. */ - case PIPE_FORMAT_R16G16B16A16_UNORM: - case PIPE_FORMAT_R16G16B16A16_SNORM: -// return V_0280A0_COLOR_16_16_16_16; - case PIPE_FORMAT_R16G16B16A16_FLOAT: -// return V_0280A0_COLOR_16_16_16_16_FLOAT; + case PIPE_FORMAT_R16G16B16A16_UNORM: + case PIPE_FORMAT_R16G16B16A16_SNORM: + // return V_0280A0_COLOR_16_16_16_16; + case PIPE_FORMAT_R16G16B16A16_FLOAT: + // return V_0280A0_COLOR_16_16_16_16_FLOAT; /* 128-bit buffers. */ - case PIPE_FORMAT_R32G32B32A32_FLOAT: -// return V_0280A0_COLOR_32_32_32_32_FLOAT; + case PIPE_FORMAT_R32G32B32A32_FLOAT: + // return V_0280A0_COLOR_32_32_32_32_FLOAT; return 0; default: R600_ERR("unsupported colorswap format %d\n", format); return ~0; } return ~0; - } static INLINE uint32_t r600_translate_colorformat(enum pipe_format format) { switch (format) { /* 8-bit buffers. */ - case PIPE_FORMAT_A8_UNORM: - case PIPE_FORMAT_I8_UNORM: - case PIPE_FORMAT_L8_UNORM: - case PIPE_FORMAT_R8_UNORM: - case PIPE_FORMAT_R8_SNORM: + case PIPE_FORMAT_A8_UNORM: + case PIPE_FORMAT_I8_UNORM: + case PIPE_FORMAT_L8_UNORM: + case PIPE_FORMAT_R8_UNORM: + case PIPE_FORMAT_R8_SNORM: return V_0280A0_COLOR_8; /* 16-bit buffers. */ - case PIPE_FORMAT_B5G6R5_UNORM: + case PIPE_FORMAT_B5G6R5_UNORM: return V_0280A0_COLOR_5_6_5; - case PIPE_FORMAT_B5G5R5A1_UNORM: - case PIPE_FORMAT_B5G5R5X1_UNORM: + case PIPE_FORMAT_B5G5R5A1_UNORM: + case PIPE_FORMAT_B5G5R5X1_UNORM: return V_0280A0_COLOR_1_5_5_5; - case PIPE_FORMAT_B4G4R4A4_UNORM: - case PIPE_FORMAT_B4G4R4X4_UNORM: + case PIPE_FORMAT_B4G4R4A4_UNORM: + case PIPE_FORMAT_B4G4R4X4_UNORM: return V_0280A0_COLOR_4_4_4_4; /* 32-bit buffers. */ - case PIPE_FORMAT_B8G8R8A8_UNORM: - case PIPE_FORMAT_B8G8R8X8_UNORM: - case PIPE_FORMAT_A8R8G8B8_UNORM: - case PIPE_FORMAT_X8R8G8B8_UNORM: - case PIPE_FORMAT_A8B8G8R8_UNORM: - case PIPE_FORMAT_R8G8B8A8_SNORM: - case PIPE_FORMAT_X8B8G8R8_UNORM: - case PIPE_FORMAT_R8G8B8X8_UNORM: - case PIPE_FORMAT_R8SG8SB8UX8U_NORM: - case PIPE_FORMAT_A8B8G8R8_SRGB: - case PIPE_FORMAT_B8G8R8A8_SRGB: + case PIPE_FORMAT_A8B8G8R8_SRGB: + case PIPE_FORMAT_A8B8G8R8_UNORM: + case PIPE_FORMAT_A8R8G8B8_UNORM: + case PIPE_FORMAT_B8G8R8A8_SRGB: + case PIPE_FORMAT_B8G8R8A8_UNORM: + case PIPE_FORMAT_B8G8R8X8_UNORM: + case PIPE_FORMAT_R8G8B8A8_SNORM: + case PIPE_FORMAT_R8G8B8A8_UNORM: + case PIPE_FORMAT_R8G8B8X8_UNORM: + case PIPE_FORMAT_R8SG8SB8UX8U_NORM: + case PIPE_FORMAT_X8B8G8R8_UNORM: + case PIPE_FORMAT_X8R8G8B8_UNORM: return V_0280A0_COLOR_8_8_8_8; - case PIPE_FORMAT_R10G10B10A2_UNORM: - case PIPE_FORMAT_R10G10B10X2_SNORM: - case PIPE_FORMAT_B10G10R10A2_UNORM: - case PIPE_FORMAT_R10SG10SB10SA2U_NORM: + case PIPE_FORMAT_R10G10B10A2_UNORM: + case PIPE_FORMAT_R10G10B10X2_SNORM: + case PIPE_FORMAT_B10G10R10A2_UNORM: + case PIPE_FORMAT_R10SG10SB10SA2U_NORM: return V_0280A0_COLOR_10_10_10_2; case PIPE_FORMAT_Z24X8_UNORM: @@ -265,25 +265,24 @@ static INLINE uint32_t r600_translate_colorformat(enum pipe_format format) return V_0280A0_COLOR_24_8; /* 64-bit buffers. */ - case PIPE_FORMAT_R16G16B16A16_UNORM: - case PIPE_FORMAT_R16G16B16A16_SNORM: + case PIPE_FORMAT_R16G16B16A16_UNORM: + case PIPE_FORMAT_R16G16B16A16_SNORM: return V_0280A0_COLOR_16_16_16_16; - case PIPE_FORMAT_R16G16B16A16_FLOAT: + case PIPE_FORMAT_R16G16B16A16_FLOAT: return V_0280A0_COLOR_16_16_16_16_FLOAT; - case PIPE_FORMAT_R32G32_FLOAT: + case PIPE_FORMAT_R32G32_FLOAT: return V_0280A0_COLOR_32_32_FLOAT; /* 128-bit buffers. */ - case PIPE_FORMAT_R32G32B32_FLOAT: - case PIPE_FORMAT_R32G32B32A32_FLOAT: + case PIPE_FORMAT_R32G32B32_FLOAT: + case PIPE_FORMAT_R32G32B32A32_FLOAT: return V_0280A0_COLOR_32_32_32_32_FLOAT; /* YUV buffers. */ - case PIPE_FORMAT_UYVY: -// return R300_COLOR_FORMAT_YVYU; - case PIPE_FORMAT_YUYV: -// return R300_COLOR_FORMAT_VYUY; - default: + case PIPE_FORMAT_UYVY: + case PIPE_FORMAT_YUYV: + default: + R600_ERR("unsupported color format %d\n", format); return ~0; /* Unsupported. */ } } diff --git a/src/gallium/drivers/r600/r600d.h b/src/gallium/drivers/r600/r600d.h index f9cad93185..fb71b1e5d1 100644 --- a/src/gallium/drivers/r600/r600d.h +++ b/src/gallium/drivers/r600/r600d.h @@ -209,12 +209,24 @@ #define S_0280A0_NUMBER_TYPE(x) (((x) & 0x7) << 12) #define G_0280A0_NUMBER_TYPE(x) (((x) >> 12) & 0x7) #define C_0280A0_NUMBER_TYPE 0xFFFF8FFF +#define V_0280A0_NUMBER_UNORM 0x00000000 +#define V_0280A0_NUMBER_SNORM 0x00000001 +#define V_0280A0_NUMBER_USCALED 0x00000002 +#define V_0280A0_NUMBER_SSCALED 0x00000003 +#define V_0280A0_NUMBER_UINT 0x00000004 +#define V_0280A0_NUMBER_SINT 0x00000005 +#define V_0280A0_NUMBER_SRGB 0x00000006 +#define V_0280A0_NUMBER_FLOAT 0x00000007 #define S_0280A0_READ_SIZE(x) (((x) & 0x1) << 15) #define G_0280A0_READ_SIZE(x) (((x) >> 15) & 0x1) #define C_0280A0_READ_SIZE 0xFFFF7FFF #define S_0280A0_COMP_SWAP(x) (((x) & 0x3) << 16) #define G_0280A0_COMP_SWAP(x) (((x) >> 16) & 0x3) #define C_0280A0_COMP_SWAP 0xFFFCFFFF +#define V_0280A0_SWAP_STD 0x00000000 +#define V_0280A0_SWAP_ALT 0x00000001 +#define V_0280A0_SWAP_STD_REV 0x00000002 +#define V_0280A0_SWAP_ALT_REV 0x00000003 #define S_0280A0_TILE_MODE(x) (((x) & 0x3) << 18) #define G_0280A0_TILE_MODE(x) (((x) >> 18) & 0x3) #define C_0280A0_TILE_MODE 0xFFF3FFFF @@ -1169,11 +1181,4 @@ #define G_0286D4_PNT_SPRITE_TOP_1(x) (((x) >> 14) & 0x1) #define C_0286D4_PNT_SPRITE_TOP_1 0xFFFFBFFF -/* temporary swap */ -#define SWAP_STD 0 -#define SWAP_ALT 1 -#define SWAP_STD_REV 2 -#define SWAP_ALT_REV 3 - -#define NUM_FORMAT_SRGB 6 #endif -- cgit v1.2.3 From 6ae39f6dca8f0968902642f04f1deb6f573edb6d Mon Sep 17 00:00:00 2001 From: Chia-I Wu Date: Sat, 7 Aug 2010 02:14:50 -0600 Subject: draw: Assert that only the first vetex may have flags set. 642d5ba79abc6a231a5fdabb3454b9b082b0d7f8 removed flags masking for vertices other than the first one. Add assertions to be on the safe side. --- src/gallium/auxiliary/draw/draw_pipe.c | 36 ++++++++++++++++++++++------------ 1 file changed, 23 insertions(+), 13 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/auxiliary/draw/draw_pipe.c b/src/gallium/auxiliary/draw/draw_pipe.c index 070ac803c8..58995e0724 100644 --- a/src/gallium/auxiliary/draw/draw_pipe.c +++ b/src/gallium/auxiliary/draw/draw_pipe.c @@ -175,21 +175,31 @@ static void do_triangle( struct draw_context *draw, * higher bits of i0. Otherwise, flags do not matter. */ -#define TRIANGLE(flags,i0,i1,i2) \ - do_triangle( draw, \ - i0, /* flags */ \ - verts + stride * (i0 & ~DRAW_PIPE_FLAG_MASK), \ - verts + stride * (i1), \ - verts + stride * (i2) ) - -#define LINE(flags,i0,i1) \ - do_line( draw, \ - i0, /* flags */ \ - verts + stride * (i0 & ~DRAW_PIPE_FLAG_MASK), \ - verts + stride * (i1) ) +#define TRIANGLE(flags,i0,i1,i2) \ + do { \ + assert(!((i1) & DRAW_PIPE_FLAG_MASK)); \ + assert(!((i2) & DRAW_PIPE_FLAG_MASK)); \ + do_triangle( draw, \ + i0, /* flags */ \ + verts + stride * (i0 & ~DRAW_PIPE_FLAG_MASK), \ + verts + stride * (i1), \ + verts + stride * (i2) ); \ + } while (0) + +#define LINE(flags,i0,i1) \ + do { \ + assert(!((i1) & DRAW_PIPE_FLAG_MASK)); \ + do_line( draw, \ + i0, /* flags */ \ + verts + stride * (i0 & ~DRAW_PIPE_FLAG_MASK), \ + verts + stride * (i1) ); \ + } while (0) #define POINT(i0) \ - do_point( draw, verts + stride * (i0) ) + do { \ + assert(!((i0) & DRAW_PIPE_FLAG_MASK)); \ + do_point( draw, verts + stride * (i0) ); \ + } while (0) #define GET_ELT(idx) (elts[idx]) -- cgit v1.2.3 From 600cd858d446bc1698a9b28f714f3fd6145316fb Mon Sep 17 00:00:00 2001 From: Chia-I Wu Date: Fri, 6 Aug 2010 22:50:09 +0800 Subject: draw: Fix draw_pt_split_prim for primitives with adjacency. Some primitives with adjacency have their "incr" wrong. --- src/gallium/auxiliary/draw/draw_pt_util.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/auxiliary/draw/draw_pt_util.c b/src/gallium/auxiliary/draw/draw_pt_util.c index 3236d38e6a..182a597cca 100644 --- a/src/gallium/auxiliary/draw/draw_pt_util.c +++ b/src/gallium/auxiliary/draw/draw_pt_util.c @@ -53,7 +53,7 @@ void draw_pt_split_prim(unsigned prim, unsigned *first, unsigned *incr) break; case PIPE_PRIM_LINES_ADJACENCY: *first = 4; - *incr = 2; + *incr = 4; break; case PIPE_PRIM_LINE_STRIP_ADJACENCY: *first = 4; @@ -65,7 +65,7 @@ void draw_pt_split_prim(unsigned prim, unsigned *first, unsigned *incr) break; case PIPE_PRIM_TRIANGLES_ADJACENCY: *first = 6; - *incr = 3; + *incr = 6; break; case PIPE_PRIM_TRIANGLE_STRIP: case PIPE_PRIM_TRIANGLE_FAN: @@ -75,7 +75,7 @@ void draw_pt_split_prim(unsigned prim, unsigned *first, unsigned *incr) break; case PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY: *first = 6; - *incr = 1; + *incr = 2; break; case PIPE_PRIM_QUADS: *first = 4; -- cgit v1.2.3 From e34c52da87990b1ec64602a20418cdb274deefa9 Mon Sep 17 00:00:00 2001 From: Chia-I Wu Date: Sat, 7 Aug 2010 00:50:32 +0800 Subject: draw: Add an assertion to varray's version of trim(). Assert that "first" is always smaller than "count" and add reasoning. It would be better to simply fix trim(), but it is used in tight loops right now. --- src/gallium/auxiliary/draw/draw_pt_varray_tmp_linear.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'src/gallium') diff --git a/src/gallium/auxiliary/draw/draw_pt_varray_tmp_linear.h b/src/gallium/auxiliary/draw/draw_pt_varray_tmp_linear.h index a292346be9..55e43b2a71 100644 --- a/src/gallium/auxiliary/draw/draw_pt_varray_tmp_linear.h +++ b/src/gallium/auxiliary/draw/draw_pt_varray_tmp_linear.h @@ -1,6 +1,11 @@ static unsigned trim( unsigned count, unsigned first, unsigned incr ) { - return count - (count - first) % incr; + /* + * count either has been trimmed in draw_pt_arrays or is set to + * (driver)_fetch_max which is hopefully always larger than first. + */ + assert(count >= first); + return count - (count - first) % incr; } static void FUNC(struct draw_pt_front_end *frontend, -- cgit v1.2.3 From 69d450856aedda9c46739c64eac82cf14447426e Mon Sep 17 00:00:00 2001 From: Vinson Lee Date: Fri, 6 Aug 2010 11:45:52 -0700 Subject: targets/egl: Fix build by including missing headers. --- src/gallium/targets/egl/st_GLESv1_CM.c | 1 + src/gallium/targets/egl/st_GLESv2.c | 1 + 2 files changed, 2 insertions(+) (limited to 'src/gallium') diff --git a/src/gallium/targets/egl/st_GLESv1_CM.c b/src/gallium/targets/egl/st_GLESv1_CM.c index 0c8de8992f..c1652d5131 100644 --- a/src/gallium/targets/egl/st_GLESv1_CM.c +++ b/src/gallium/targets/egl/st_GLESv1_CM.c @@ -1,3 +1,4 @@ +#include "state_tracker/st_api.h" #include "state_tracker/st_gl_api.h" PUBLIC struct st_api * diff --git a/src/gallium/targets/egl/st_GLESv2.c b/src/gallium/targets/egl/st_GLESv2.c index 87b3e65e23..9c26989008 100644 --- a/src/gallium/targets/egl/st_GLESv2.c +++ b/src/gallium/targets/egl/st_GLESv2.c @@ -1,3 +1,4 @@ +#include "state_tracker/st_api.h" #include "state_tracker/st_gl_api.h" PUBLIC struct st_api * -- cgit v1.2.3 From c3ad060488ffd98f1c6dc9127b46324c5201f434 Mon Sep 17 00:00:00 2001 From: Jerome Glisse Date: Fri, 6 Aug 2010 16:10:25 -0400 Subject: r600g: finish multi target rendering support Signed-off-by: Jerome Glisse --- src/gallium/drivers/r600/r600_context.c | 6 +- src/gallium/drivers/r600/r600_shader.c | 52 ++++++++++------- src/gallium/drivers/r600/r600_shader.h | 9 +++ src/gallium/drivers/r600/r600_state.c | 3 +- src/gallium/drivers/r600/radeon.h | 30 +++++++--- src/gallium/winsys/r600/drm/r600_states.h | 96 +++++++++++++++++++++++++++++-- 6 files changed, 158 insertions(+), 38 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/drivers/r600/r600_context.c b/src/gallium/drivers/r600/r600_context.c index ae1780a1d4..29dc93bae6 100644 --- a/src/gallium/drivers/r600/r600_context.c +++ b/src/gallium/drivers/r600/r600_context.c @@ -47,14 +47,16 @@ void r600_flush(struct pipe_context *ctx, unsigned flags, struct r600_context *rctx = r600_context(ctx); struct r600_screen *rscreen = rctx->screen; static int dc = 0; + char dname[256]; if (radeon_ctx_pm4(rctx->ctx)) return; /* FIXME dumping should be removed once shader support instructions * without throwing bad code */ - if (!dc) - radeon_ctx_dump_bof(rctx->ctx, "gallium.bof"); + sprintf(dname, "gallium-%08d.bof", dc); + if (dc < 10) + radeon_ctx_dump_bof(rctx->ctx, dname); #if 1 radeon_ctx_submit(rctx->ctx); #endif diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index dc8d4cb315..33dff97d22 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -339,7 +339,7 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s { struct tgsi_full_immediate *immediate; struct r600_shader_ctx ctx; - struct r600_bc_output output; + struct r600_bc_output output[32]; unsigned opcode; int i, r = 0, pos0; @@ -418,33 +418,37 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s } /* export output */ for (i = 0, pos0 = 0; i < shader->noutput; i++) { - memset(&output, 0, sizeof(struct r600_bc_output)); - output.gpr = shader->output[i].gpr; - output.elem_size = 3; - output.swizzle_x = 0; - output.swizzle_y = 1; - output.swizzle_z = 2; - output.swizzle_w = 3; - output.barrier = 1; - output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM; - output.array_base = i - pos0; - output.inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE; + memset(&output[i], 0, sizeof(struct r600_bc_output)); + output[i].gpr = shader->output[i].gpr; + output[i].elem_size = 3; + output[i].swizzle_x = 0; + output[i].swizzle_y = 1; + output[i].swizzle_z = 2; + output[i].swizzle_w = 3; + output[i].barrier = 1; + output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM; + output[i].array_base = i - pos0; + output[i].inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT; switch (ctx.type == TGSI_PROCESSOR_VERTEX) { case TGSI_PROCESSOR_VERTEX: + shader->output[i].type = r600_export_parameter; if (shader->output[i].name == TGSI_SEMANTIC_POSITION) { - output.array_base = 60; - output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; + shader->output[i].type = r600_export_position; + output[i].array_base = 60; + output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; /* position doesn't count in array_base */ pos0 = 1; } break; case TGSI_PROCESSOR_FRAGMENT: + shader->output[i].type = r600_export_framebuffer; if (shader->output[i].name == TGSI_SEMANTIC_COLOR) { - output.array_base = 0; - output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; + output[i].array_base = 0; + output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; } else if (shader->output[i].name == TGSI_SEMANTIC_POSITION) { - output.array_base = 61; - output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; + shader->output[i].type = r600_export_position; + output[i].array_base = 61; + output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; } else { R600_ERR("unsupported fragment output name %d\n", shader->output[i].name); r = -EINVAL; @@ -457,9 +461,17 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s goto out_err; } if (i == (shader->noutput - 1)) { - output.end_of_program = 1; + output[i].end_of_program = 1; } - r = r600_bc_add_output(ctx.bc, &output); + } + for (i = shader->noutput - 1, shader->output_done = 0; i >= 0; i--) { + if (!(shader->output_done & (1 << output[i].type))) { + shader->output_done |= (1 << output[i].type); + output[i].inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE; + } + } + for (i = 0; i < shader->noutput; i++) { + r = r600_bc_add_output(ctx.bc, &output[i]); if (r) goto out_err; } diff --git a/src/gallium/drivers/r600/r600_shader.h b/src/gallium/drivers/r600/r600_shader.h index ee0381e8bd..15562c19a5 100644 --- a/src/gallium/drivers/r600/r600_shader.h +++ b/src/gallium/drivers/r600/r600_shader.h @@ -25,9 +25,17 @@ #include "r600_asm.h" +enum r600_export_type { + r600_export_position = 0, + r600_export_parameter, + r600_export_framebuffer, +}; + struct r600_shader_io { unsigned name; unsigned gpr; + unsigned done; + unsigned type; int sid; unsigned interpolate; }; @@ -41,6 +49,7 @@ struct r600_shader { struct r600_shader_io input[32]; struct r600_shader_io output[32]; enum radeon_family family; + unsigned output_done; }; #endif diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index deb9bf3395..ef6c1bedeb 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -675,9 +675,8 @@ static struct radeon_state *r600_cb(struct r600_context *rctx, int cb) unsigned color_info; unsigned format, swap, ntype; const struct util_format_description *desc; - int id = R600_CB0 + cb; - rstate = radeon_state(rscreen->rw, R600_CB0_TYPE, id); + rstate = radeon_state(rscreen->rw, R600_CB0_TYPE + cb, R600_CB0 + cb); if (rstate == NULL) return NULL; rtex = (struct r600_resource_texture*)state->cbufs[cb]->texture; diff --git a/src/gallium/drivers/r600/radeon.h b/src/gallium/drivers/r600/radeon.h index 3a8405f9b4..00cff41b4f 100644 --- a/src/gallium/drivers/r600/radeon.h +++ b/src/gallium/drivers/r600/radeon.h @@ -160,8 +160,8 @@ void radeon_ctx_dump_bof(struct radeon_ctx *ctx, const char *file); * R600/R700 */ -#define R600_NSTATE 1273 -#define R600_NTYPE 25 +#define R600_NSTATE 1280 +#define R600_NTYPE 32 #define R600_CONFIG 0 #define R600_CONFIG_TYPE 0 @@ -207,12 +207,26 @@ void radeon_ctx_dump_bof(struct radeon_ctx *ctx, const char *file); #define R600_GS_SAMPLER_BORDER_TYPE 20 #define R600_CB0 1269 #define R600_CB0_TYPE 21 -#define R600_DB 1270 -#define R600_DB_TYPE 22 -#define R600_VGT 1271 -#define R600_VGT_TYPE 23 -#define R600_DRAW 1272 -#define R600_DRAW_TYPE 24 +#define R600_CB1 1270 +#define R600_CB1_TYPE 22 +#define R600_CB2 1271 +#define R600_CB2_TYPE 23 +#define R600_CB3 1272 +#define R600_CB3_TYPE 24 +#define R600_CB4 1273 +#define R600_CB4_TYPE 25 +#define R600_CB5 1274 +#define R600_CB5_TYPE 26 +#define R600_CB6 1275 +#define R600_CB6_TYPE 27 +#define R600_CB7 1276 +#define R600_CB7_TYPE 28 +#define R600_DB 1277 +#define R600_DB_TYPE 29 +#define R600_VGT 1278 +#define R600_VGT_TYPE 30 +#define R600_DRAW 1279 +#define R600_DRAW_TYPE 31 /* R600_CONFIG */ #define R600_CONFIG__SQ_CONFIG 0 #define R600_CONFIG__SQ_GPR_RESOURCE_MGMT_1 1 diff --git a/src/gallium/winsys/r600/drm/r600_states.h b/src/gallium/winsys/r600/drm/r600_states.h index 5896df21b2..e40c77d8f6 100644 --- a/src/gallium/winsys/r600/drm/r600_states.h +++ b/src/gallium/winsys/r600/drm/r600_states.h @@ -372,6 +372,76 @@ static const struct radeon_register R600_CB0_names[] = { {0x00028100, 0, 0, "CB_COLOR0_MASK"}, }; +static const struct radeon_register R600_CB1_names[] = { + {0x00028044, 1, 0, "CB_COLOR1_BASE"}, + {0x000280A4, 0, 0, "CB_COLOR1_INFO"}, + {0x00028064, 0, 0, "CB_COLOR1_SIZE"}, + {0x00028084, 0, 0, "CB_COLOR1_VIEW"}, + {0x000280E4, 1, 1, "CB_COLOR1_FRAG"}, + {0x000280C4, 1, 2, "CB_COLOR1_TILE"}, + {0x00028104, 0, 0, "CB_COLOR1_MASK"}, +}; + +static const struct radeon_register R600_CB2_names[] = { + {0x00028048, 1, 0, "CB_COLOR2_BASE"}, + {0x000280A8, 0, 0, "CB_COLOR2_INFO"}, + {0x00028068, 0, 0, "CB_COLOR2_SIZE"}, + {0x00028088, 0, 0, "CB_COLOR2_VIEW"}, + {0x000280E8, 1, 1, "CB_COLOR2_FRAG"}, + {0x000280C8, 1, 2, "CB_COLOR2_TILE"}, + {0x00028108, 0, 0, "CB_COLOR2_MASK"}, +}; + +static const struct radeon_register R600_CB3_names[] = { + {0x0002804C, 1, 0, "CB_COLOR3_BASE"}, + {0x000280AC, 0, 0, "CB_COLOR3_INFO"}, + {0x0002806C, 0, 0, "CB_COLOR3_SIZE"}, + {0x0002808C, 0, 0, "CB_COLOR3_VIEW"}, + {0x000280EC, 1, 1, "CB_COLOR3_FRAG"}, + {0x000280CC, 1, 2, "CB_COLOR3_TILE"}, + {0x0002810C, 0, 0, "CB_COLOR3_MASK"}, +}; + +static const struct radeon_register R600_CB4_names[] = { + {0x00028050, 1, 0, "CB_COLOR4_BASE"}, + {0x000280B0, 0, 0, "CB_COLOR4_INFO"}, + {0x00028070, 0, 0, "CB_COLOR4_SIZE"}, + {0x00028090, 0, 0, "CB_COLOR4_VIEW"}, + {0x000280F0, 1, 1, "CB_COLOR4_FRAG"}, + {0x000280D0, 1, 2, "CB_COLOR4_TILE"}, + {0x00028110, 0, 0, "CB_COLOR4_MASK"}, +}; + +static const struct radeon_register R600_CB5_names[] = { + {0x00028054, 1, 0, "CB_COLOR5_BASE"}, + {0x000280B4, 0, 0, "CB_COLOR5_INFO"}, + {0x00028074, 0, 0, "CB_COLOR5_SIZE"}, + {0x00028094, 0, 0, "CB_COLOR5_VIEW"}, + {0x000280F4, 1, 1, "CB_COLOR5_FRAG"}, + {0x000280D4, 1, 2, "CB_COLOR5_TILE"}, + {0x00028114, 0, 0, "CB_COLOR5_MASK"}, +}; + +static const struct radeon_register R600_CB6_names[] = { + {0x00028058, 1, 0, "CB_COLOR6_BASE"}, + {0x000280B8, 0, 0, "CB_COLOR6_INFO"}, + {0x00028078, 0, 0, "CB_COLOR6_SIZE"}, + {0x00028098, 0, 0, "CB_COLOR6_VIEW"}, + {0x000280F8, 1, 1, "CB_COLOR6_FRAG"}, + {0x000280D8, 1, 2, "CB_COLOR6_TILE"}, + {0x00028118, 0, 0, "CB_COLOR6_MASK"}, +}; + +static const struct radeon_register R600_CB7_names[] = { + {0x0002805C, 1, 0, "CB_COLOR7_BASE"}, + {0x000280BC, 0, 0, "CB_COLOR7_INFO"}, + {0x0002807C, 0, 0, "CB_COLOR7_SIZE"}, + {0x0002809C, 0, 0, "CB_COLOR7_VIEW"}, + {0x000280FC, 1, 1, "CB_COLOR7_FRAG"}, + {0x000280DC, 1, 2, "CB_COLOR7_TILE"}, + {0x0002811C, 0, 0, "CB_COLOR7_MASK"}, +}; + static const struct radeon_register R600_DB_names[] = { {0x0002800C, 1, 0, "DB_DEPTH_BASE"}, {0x00028000, 0, 0, "DB_DEPTH_SIZE"}, @@ -425,9 +495,16 @@ static struct radeon_type R600_types[] = { { 128, 1233, 0x0000A600, 0x0000A720, 0x0010, 0, "R600_VS_SAMPLER_BORDER", 4, r600_state_pm4_generic, R600_VS_SAMPLER_BORDER_names}, { 128, 1251, 0x0000A800, 0x0000A920, 0x0010, 0, "R600_GS_SAMPLER_BORDER", 4, r600_state_pm4_generic, R600_GS_SAMPLER_BORDER_names}, { 128, 1269, 0x00000000, 0x00000000, 0x0000, 0, "R600_CB0", 7, r600_state_pm4_cb0, R600_CB0_names}, - { 128, 1270, 0x00000000, 0x00000000, 0x0000, 0, "R600_DB", 6, r600_state_pm4_db, R600_DB_names}, - { 128, 1271, 0x00000000, 0x00000000, 0x0000, 0, "R600_VGT", 11, r600_state_pm4_vgt, R600_VGT_names}, - { 128, 1272, 0x00000000, 0x00000000, 0x0000, 0, "R600_DRAW", 4, r600_state_pm4_draw, R600_DRAW_names}, + { 128, 1270, 0x00000000, 0x00000000, 0x0000, 0, "R600_CB1", 7, r600_state_pm4_cb0, R600_CB1_names}, + { 128, 1271, 0x00000000, 0x00000000, 0x0000, 0, "R600_CB2", 7, r600_state_pm4_cb0, R600_CB2_names}, + { 128, 1272, 0x00000000, 0x00000000, 0x0000, 0, "R600_CB3", 7, r600_state_pm4_cb0, R600_CB3_names}, + { 128, 1273, 0x00000000, 0x00000000, 0x0000, 0, "R600_CB4", 7, r600_state_pm4_cb0, R600_CB4_names}, + { 128, 1274, 0x00000000, 0x00000000, 0x0000, 0, "R600_CB5", 7, r600_state_pm4_cb0, R600_CB5_names}, + { 128, 1275, 0x00000000, 0x00000000, 0x0000, 0, "R600_CB6", 7, r600_state_pm4_cb0, R600_CB6_names}, + { 128, 1276, 0x00000000, 0x00000000, 0x0000, 0, "R600_CB7", 7, r600_state_pm4_cb0, R600_CB7_names}, + { 128, 1277, 0x00000000, 0x00000000, 0x0000, 0, "R600_DB", 6, r600_state_pm4_db, R600_DB_names}, + { 128, 1278, 0x00000000, 0x00000000, 0x0000, 0, "R600_VGT", 11, r600_state_pm4_vgt, R600_VGT_names}, + { 128, 1279, 0x00000000, 0x00000000, 0x0000, 0, "R600_DRAW", 4, r600_state_pm4_draw, R600_DRAW_names}, }; static struct radeon_type R700_types[] = { @@ -453,9 +530,16 @@ static struct radeon_type R700_types[] = { { 128, 1233, 0x0000A600, 0x0000A720, 0x0010, 0, "R600_VS_SAMPLER_BORDER", 4, r600_state_pm4_generic, R600_VS_SAMPLER_BORDER_names}, { 128, 1251, 0x0000A800, 0x0000A920, 0x0010, 0, "R600_GS_SAMPLER_BORDER", 4, r600_state_pm4_generic, R600_GS_SAMPLER_BORDER_names}, { 128, 1269, 0x00000000, 0x00000000, 0x0000, 0, "R600_CB0", 7, r700_state_pm4_cb0, R600_CB0_names}, - { 128, 1270, 0x00000000, 0x00000000, 0x0000, 0, "R600_DB", 6, r700_state_pm4_db, R600_DB_names}, - { 128, 1271, 0x00000000, 0x00000000, 0x0000, 0, "R600_VGT", 11, r600_state_pm4_vgt, R600_VGT_names}, - { 128, 1272, 0x00000000, 0x00000000, 0x0000, 0, "R600_DRAW", 4, r600_state_pm4_draw, R600_DRAW_names}, + { 128, 1270, 0x00000000, 0x00000000, 0x0000, 0, "R600_CB1", 7, r600_state_pm4_cb0, R600_CB1_names}, + { 128, 1271, 0x00000000, 0x00000000, 0x0000, 0, "R600_CB2", 7, r600_state_pm4_cb0, R600_CB2_names}, + { 128, 1272, 0x00000000, 0x00000000, 0x0000, 0, "R600_CB3", 7, r600_state_pm4_cb0, R600_CB3_names}, + { 128, 1273, 0x00000000, 0x00000000, 0x0000, 0, "R600_CB4", 7, r600_state_pm4_cb0, R600_CB4_names}, + { 128, 1274, 0x00000000, 0x00000000, 0x0000, 0, "R600_CB5", 7, r600_state_pm4_cb0, R600_CB5_names}, + { 128, 1275, 0x00000000, 0x00000000, 0x0000, 0, "R600_CB6", 7, r600_state_pm4_cb0, R600_CB6_names}, + { 128, 1276, 0x00000000, 0x00000000, 0x0000, 0, "R600_CB7", 7, r600_state_pm4_cb0, R600_CB7_names}, + { 128, 1277, 0x00000000, 0x00000000, 0x0000, 0, "R600_DB", 6, r700_state_pm4_db, R600_DB_names}, + { 128, 1278, 0x00000000, 0x00000000, 0x0000, 0, "R600_VGT", 11, r600_state_pm4_vgt, R600_VGT_names}, + { 128, 1279, 0x00000000, 0x00000000, 0x0000, 0, "R600_DRAW", 4, r600_state_pm4_draw, R600_DRAW_names}, }; #endif -- cgit v1.2.3 From 14e9fbee1cef281c6849a5f2a6d2cc66bfd4b3fd Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 6 Aug 2010 15:09:41 -0600 Subject: gallium: remove stray semicolons --- src/gallium/auxiliary/gallivm/lp_bld_init.c | 2 +- src/gallium/auxiliary/tgsi/tgsi_sanity.c | 2 +- src/gallium/auxiliary/util/u_cpu_detect.c | 2 +- src/gallium/drivers/llvmpipe/lp_context.c | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/auxiliary/gallivm/lp_bld_init.c b/src/gallium/auxiliary/gallivm/lp_bld_init.c index ef0888079c..60d8bcfa55 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_init.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_init.c @@ -46,7 +46,7 @@ static const struct debug_named_value lp_bld_debug_flags[] = { DEBUG_NAMED_VALUE_END }; -DEBUG_GET_ONCE_FLAGS_OPTION(gallivm_debug, "GALLIVM_DEBUG", lp_bld_debug_flags, 0); +DEBUG_GET_ONCE_FLAGS_OPTION(gallivm_debug, "GALLIVM_DEBUG", lp_bld_debug_flags, 0) #endif diff --git a/src/gallium/auxiliary/tgsi/tgsi_sanity.c b/src/gallium/auxiliary/tgsi/tgsi_sanity.c index 9e02040f6c..287ee006cf 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_sanity.c +++ b/src/gallium/auxiliary/tgsi/tgsi_sanity.c @@ -34,7 +34,7 @@ #include "tgsi_iterate.h" -DEBUG_GET_ONCE_BOOL_OPTION(print_sanity, "TGSI_PRINT_SANITY", TRUE); +DEBUG_GET_ONCE_BOOL_OPTION(print_sanity, "TGSI_PRINT_SANITY", TRUE) typedef struct { diff --git a/src/gallium/auxiliary/util/u_cpu_detect.c b/src/gallium/auxiliary/util/u_cpu_detect.c index 6f38d22285..b1a8c75b99 100644 --- a/src/gallium/auxiliary/util/u_cpu_detect.c +++ b/src/gallium/auxiliary/util/u_cpu_detect.c @@ -73,7 +73,7 @@ #endif -DEBUG_GET_ONCE_BOOL_OPTION(dump_cpu, "GALLIUM_DUMP_CPU", TRUE); +DEBUG_GET_ONCE_BOOL_OPTION(dump_cpu, "GALLIUM_DUMP_CPU", TRUE) struct util_cpu_caps util_cpu_caps; diff --git a/src/gallium/drivers/llvmpipe/lp_context.c b/src/gallium/drivers/llvmpipe/lp_context.c index 28793682ed..7543bd7b2b 100644 --- a/src/gallium/drivers/llvmpipe/lp_context.c +++ b/src/gallium/drivers/llvmpipe/lp_context.c @@ -47,7 +47,7 @@ #include "lp_setup.h" -DEBUG_GET_ONCE_BOOL_OPTION(lp_no_rast, "LP_NO_RAST", FALSE); +DEBUG_GET_ONCE_BOOL_OPTION(lp_no_rast, "LP_NO_RAST", FALSE) static void llvmpipe_destroy( struct pipe_context *pipe ) -- cgit v1.2.3 From b474478f206c6d81af78696d3d5ce156d4d413d7 Mon Sep 17 00:00:00 2001 From: Jerome Glisse Date: Fri, 6 Aug 2010 17:12:37 -0400 Subject: r600g: really fix multi target support Signed-off-by: Jerome Glisse --- src/gallium/drivers/r600/r600_context.c | 20 +------------------- src/gallium/drivers/r600/r600_shader.c | 19 ++++++++----------- src/gallium/drivers/r600/r600_shader.h | 8 -------- src/gallium/drivers/r600/r600_state.c | 25 +++++++++++++++++-------- 4 files changed, 26 insertions(+), 46 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/drivers/r600/r600_context.c b/src/gallium/drivers/r600/r600_context.c index 29dc93bae6..052eb1cd6d 100644 --- a/src/gallium/drivers/r600/r600_context.c +++ b/src/gallium/drivers/r600/r600_context.c @@ -55,7 +55,7 @@ void r600_flush(struct pipe_context *ctx, unsigned flags, * without throwing bad code */ sprintf(dname, "gallium-%08d.bof", dc); - if (dc < 10) + if (dc < 1) radeon_ctx_dump_bof(rctx->ctx, dname); #if 1 radeon_ctx_submit(rctx->ctx); @@ -204,24 +204,6 @@ static void r600_init_config(struct r600_context *rctx) num_es_stack_entries = 0; break; } - printf("ps_prio : %d\n", ps_prio); - printf("vs_prio : %d\n", vs_prio); - printf("gs_prio : %d\n", gs_prio); - printf("es_prio : %d\n", es_prio); - printf("num_ps_gprs : %d\n", num_ps_gprs); - printf("num_vs_gprs : %d\n", num_vs_gprs); - printf("num_gs_gprs : %d\n", num_gs_gprs); - printf("num_es_gprs : %d\n", num_es_gprs); - printf("num_temp_gprs : %d\n", num_temp_gprs); - printf("num_ps_threads : %d\n", num_ps_threads); - printf("num_vs_threads : %d\n", num_vs_threads); - printf("num_gs_threads : %d\n", num_gs_threads); - printf("num_es_threads : %d\n", num_es_threads); - printf("num_ps_stack_entries : %d\n", num_ps_stack_entries); - printf("num_vs_stack_entries : %d\n", num_vs_stack_entries); - printf("num_gs_stack_entries : %d\n", num_gs_stack_entries); - printf("num_es_stack_entries : %d\n", num_es_stack_entries); - rctx->hw_states.config = radeon_state(rctx->rw, R600_CONFIG_TYPE, R600_CONFIG); rctx->hw_states.config->states[R600_CONFIG__SQ_CONFIG] = 0x00000000; diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index 33dff97d22..8a778f5fd6 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -105,8 +105,8 @@ int r600_pipe_shader_create(struct pipe_context *ctx, struct r600_screen *rscreen = r600_screen(ctx->screen); int r; -fprintf(stderr, "--------------------------------------------------------------\n"); -tgsi_dump(tokens, 0); +//fprintf(stderr, "--------------------------------------------------------------\n"); +//tgsi_dump(tokens, 0); if (rpshader == NULL) return -ENOMEM; rpshader->shader.family = radeon_get_family(rscreen->rw); @@ -120,7 +120,7 @@ tgsi_dump(tokens, 0); R600_ERR("building bytecode failed !\n"); return r; } -fprintf(stderr, "______________________________________________________________\n"); +//fprintf(stderr, "______________________________________________________________\n"); return 0; } @@ -340,6 +340,7 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s struct tgsi_full_immediate *immediate; struct r600_shader_ctx ctx; struct r600_bc_output output[32]; + unsigned output_done; unsigned opcode; int i, r = 0, pos0; @@ -431,9 +432,7 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s output[i].inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT; switch (ctx.type == TGSI_PROCESSOR_VERTEX) { case TGSI_PROCESSOR_VERTEX: - shader->output[i].type = r600_export_parameter; if (shader->output[i].name == TGSI_SEMANTIC_POSITION) { - shader->output[i].type = r600_export_position; output[i].array_base = 60; output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; /* position doesn't count in array_base */ @@ -441,12 +440,10 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s } break; case TGSI_PROCESSOR_FRAGMENT: - shader->output[i].type = r600_export_framebuffer; if (shader->output[i].name == TGSI_SEMANTIC_COLOR) { - output[i].array_base = 0; + output[i].array_base = shader->output[i].sid; output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; } else if (shader->output[i].name == TGSI_SEMANTIC_POSITION) { - shader->output[i].type = r600_export_position; output[i].array_base = 61; output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; } else { @@ -464,9 +461,9 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s output[i].end_of_program = 1; } } - for (i = shader->noutput - 1, shader->output_done = 0; i >= 0; i--) { - if (!(shader->output_done & (1 << output[i].type))) { - shader->output_done |= (1 << output[i].type); + for (i = shader->noutput - 1, output_done = 0; i >= 0; i--) { + if (!(output_done & (1 << output[i].type))) { + output_done |= (1 << output[i].type); output[i].inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE; } } diff --git a/src/gallium/drivers/r600/r600_shader.h b/src/gallium/drivers/r600/r600_shader.h index 15562c19a5..2ee7780ead 100644 --- a/src/gallium/drivers/r600/r600_shader.h +++ b/src/gallium/drivers/r600/r600_shader.h @@ -25,17 +25,10 @@ #include "r600_asm.h" -enum r600_export_type { - r600_export_position = 0, - r600_export_parameter, - r600_export_framebuffer, -}; - struct r600_shader_io { unsigned name; unsigned gpr; unsigned done; - unsigned type; int sid; unsigned interpolate; }; @@ -49,7 +42,6 @@ struct r600_shader { struct r600_shader_io input[32]; struct r600_shader_io output[32]; enum radeon_family family; - unsigned output_done; }; #endif diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index ef6c1bedeb..223f2f3900 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -1205,7 +1205,7 @@ static struct radeon_state *r600_cb_cntl(struct r600_context *rctx) struct r600_screen *rscreen = rctx->screen; struct radeon_state *rstate; const struct pipe_blend_state *pbs = &rctx->blend->state.blend; - int nr_cbufs = rctx->framebuffer->state.framebuffer.nr_cbufs; + int nr_cbufs = rctx->framebuffer->state.framebuffer.nr_cbufs; uint32_t color_control, target_mask, shader_mask; int i; @@ -1214,20 +1214,29 @@ static struct radeon_state *r600_cb_cntl(struct r600_context *rctx) color_control = S_028808_PER_MRT_BLEND(1); for (i = 0; i < nr_cbufs; i++) { - shader_mask |= 0xf << i; + shader_mask |= 0xf << (i * 4); } if (pbs->logicop_enable) { color_control |= (pbs->logicop_func) << 16; - } else + } else { color_control |= (0xcc << 16); + } - for (i = 0; i < 8; i++) { - if (pbs->rt[i].blend_enable) { - color_control |= S_028808_TARGET_BLEND_ENABLE(1 << i); + if (pbs->independent_blend_enable) { + for (i = 0; i < 8; i++) { + if (pbs->rt[i].blend_enable) { + color_control |= S_028808_TARGET_BLEND_ENABLE(1 << i); + } + target_mask |= (pbs->rt[i].colormask << (4 * i)); + } + } else { + for (i = 0; i < 8; i++) { + if (pbs->rt[0].blend_enable) { + color_control |= S_028808_TARGET_BLEND_ENABLE(1 << i); + } + target_mask |= (pbs->rt[0].colormask << (4 * i)); } - target_mask |= (pbs->rt[i].colormask << (4 * i)); - } rstate = radeon_state(rscreen->rw, R600_CB_CNTL_TYPE, R600_CB_CNTL); rstate->states[R600_CB_CNTL__CB_SHADER_MASK] = shader_mask; -- cgit v1.2.3 From 32251c34f06ef91759fa75271ce724a06483cc42 Mon Sep 17 00:00:00 2001 From: Jerome Glisse Date: Fri, 6 Aug 2010 17:22:45 -0400 Subject: r600g: fix rendering, only enable target we write too Signed-off-by: Jerome Glisse --- src/gallium/drivers/r600/r600_state.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index 223f2f3900..ff621084d4 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -1224,14 +1224,14 @@ static struct radeon_state *r600_cb_cntl(struct r600_context *rctx) } if (pbs->independent_blend_enable) { - for (i = 0; i < 8; i++) { + for (i = 0; i < nr_cbufs; i++) { if (pbs->rt[i].blend_enable) { color_control |= S_028808_TARGET_BLEND_ENABLE(1 << i); } target_mask |= (pbs->rt[i].colormask << (4 * i)); } } else { - for (i = 0; i < 8; i++) { + for (i = 0; i < nr_cbufs; i++) { if (pbs->rt[0].blend_enable) { color_control |= S_028808_TARGET_BLEND_ENABLE(1 << i); } -- cgit v1.2.3 From d9f72b9f909b32ff0adacf939c75eb2924ed133b Mon Sep 17 00:00:00 2001 From: Jerome Glisse Date: Fri, 6 Aug 2010 17:27:36 -0400 Subject: r600g: add PA_CL_CLIP_CNTL definition Signed-off-by: Jerome Glisse --- src/gallium/drivers/r600/r600d.h | 55 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) (limited to 'src/gallium') diff --git a/src/gallium/drivers/r600/r600d.h b/src/gallium/drivers/r600/r600d.h index fb71b1e5d1..af93731550 100644 --- a/src/gallium/drivers/r600/r600d.h +++ b/src/gallium/drivers/r600/r600d.h @@ -352,6 +352,61 @@ #define S_028808_ROP3(x) (((x) & 0xFF) << 16) #define G_028808_ROP3(x) (((x) >> 16) & 0xFF) #define C_028808_ROP3 0xFF00FFFF +#define R_028810_PA_CL_CLIP_CNTL 0x028810 +#define S_028810_UCP_ENA_0(x) (((x) & 0x1) << 0) +#define G_028810_UCP_ENA_0(x) (((x) >> 0) & 0x1) +#define C_028810_UCP_ENA_0 0xFFFFFFFE +#define S_028810_UCP_ENA_1(x) (((x) & 0x1) << 1) +#define G_028810_UCP_ENA_1(x) (((x) >> 1) & 0x1) +#define C_028810_UCP_ENA_1 0xFFFFFFFD +#define S_028810_UCP_ENA_2(x) (((x) & 0x1) << 2) +#define G_028810_UCP_ENA_2(x) (((x) >> 2) & 0x1) +#define C_028810_UCP_ENA_2 0xFFFFFFFB +#define S_028810_UCP_ENA_3(x) (((x) & 0x1) << 3) +#define G_028810_UCP_ENA_3(x) (((x) >> 3) & 0x1) +#define C_028810_UCP_ENA_3 0xFFFFFFF7 +#define S_028810_UCP_ENA_4(x) (((x) & 0x1) << 4) +#define G_028810_UCP_ENA_4(x) (((x) >> 4) & 0x1) +#define C_028810_UCP_ENA_4 0xFFFFFFEF +#define S_028810_UCP_ENA_5(x) (((x) & 0x1) << 5) +#define G_028810_UCP_ENA_5(x) (((x) >> 5) & 0x1) +#define C_028810_UCP_ENA_5 0xFFFFFFDF +#define S_028810_PS_UCP_Y_SCALE_NEG(x) (((x) & 0x1) << 13) +#define G_028810_PS_UCP_Y_SCALE_NEG(x) (((x) >> 13) & 0x1) +#define C_028810_PS_UCP_Y_SCALE_NEG 0xFFFFDFFF +#define S_028810_PS_UCP_MODE(x) (((x) & 0x3) << 14) +#define G_028810_PS_UCP_MODE(x) (((x) >> 14) & 0x3) +#define C_028810_PS_UCP_MODE 0xFFFF3FFF +#define S_028810_CLIP_DISABLE(x) (((x) & 0x1) << 16) +#define G_028810_CLIP_DISABLE(x) (((x) >> 16) & 0x1) +#define C_028810_CLIP_DISABLE 0xFFFEFFFF +#define S_028810_UCP_CULL_ONLY_ENA(x) (((x) & 0x1) << 17) +#define G_028810_UCP_CULL_ONLY_ENA(x) (((x) >> 17) & 0x1) +#define C_028810_UCP_CULL_ONLY_ENA 0xFFFDFFFF +#define S_028810_BOUNDARY_EDGE_FLAG_ENA(x) (((x) & 0x1) << 18) +#define G_028810_BOUNDARY_EDGE_FLAG_ENA(x) (((x) >> 18) & 0x1) +#define C_028810_BOUNDARY_EDGE_FLAG_ENA 0xFFFBFFFF +#define S_028810_DX_CLIP_SPACE_DEF(x) (((x) & 0x1) << 19) +#define G_028810_DX_CLIP_SPACE_DEF(x) (((x) >> 19) & 0x1) +#define C_028810_DX_CLIP_SPACE_DEF 0xFFF7FFFF +#define S_028810_DIS_CLIP_ERR_DETECT(x) (((x) & 0x1) << 20) +#define G_028810_DIS_CLIP_ERR_DETECT(x) (((x) >> 20) & 0x1) +#define C_028810_DIS_CLIP_ERR_DETECT 0xFFEFFFFF +#define S_028810_VTX_KILL_OR(x) (((x) & 0x1) << 21) +#define G_028810_VTX_KILL_OR(x) (((x) >> 21) & 0x1) +#define C_028810_VTX_KILL_OR 0xFFDFFFFF +#define S_028810_DX_LINEAR_ATTR_CLIP_ENA(x) (((x) & 0x1) << 24) +#define G_028810_DX_LINEAR_ATTR_CLIP_ENA(x) (((x) >> 24) & 0x1) +#define C_028810_DX_LINEAR_ATTR_CLIP_ENA 0xFEFFFFFF +#define S_028810_VTE_VPORT_PROVOKE_DISABLE(x) (((x) & 0x1) << 25) +#define G_028810_VTE_VPORT_PROVOKE_DISABLE(x) (((x) >> 25) & 0x1) +#define C_028810_VTE_VPORT_PROVOKE_DISABLE 0xFDFFFFFF +#define S_028810_ZCLIP_NEAR_DISABLE(x) (((x) & 0x1) << 26) +#define G_028810_ZCLIP_NEAR_DISABLE(x) (((x) >> 26) & 0x1) +#define C_028810_ZCLIP_NEAR_DISABLE 0xFBFFFFFF +#define S_028810_ZCLIP_FAR_DISABLE(x) (((x) & 0x1) << 27) +#define G_028810_ZCLIP_FAR_DISABLE(x) (((x) >> 27) & 0x1) +#define C_028810_ZCLIP_FAR_DISABLE 0xF7FFFFFF #define R_028010_DB_DEPTH_INFO 0x028010 #define S_028010_FORMAT(x) (((x) & 0x7) << 0) #define G_028010_FORMAT(x) (((x) >> 0) & 0x7) -- cgit v1.2.3 From a838cee6bc3a2e144c00f0a5f0a7791cd97037ab Mon Sep 17 00:00:00 2001 From: Maarten Maathuis Date: Fri, 6 Aug 2010 23:56:31 +0200 Subject: nouveau: fix potential NULL-ptr dereference in nouveau_stateobj.h - This can only be triggered when DEBUG_NOUVEAU_STATEOBJ is active. - Also remove a redundant pointer assignment. Reported-by: Roy Spliet Signed-off-by: Maarten Maathuis --- src/gallium/drivers/nouveau/nouveau_stateobj.h | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/drivers/nouveau/nouveau_stateobj.h b/src/gallium/drivers/nouveau/nouveau_stateobj.h index f5c1c5ca2c..e920cf9f3b 100644 --- a/src/gallium/drivers/nouveau/nouveau_stateobj.h +++ b/src/gallium/drivers/nouveau/nouveau_stateobj.h @@ -151,9 +151,9 @@ so_method(struct nouveau_stateobj *so, struct nouveau_grobj *gr, if (so->start_alloc <= so->cur_start) { debug_printf("exceeding num_start size\n"); assert(0); - } else + } #endif /* DEBUG_NOUVEAU_STATEOBJ */ - start = so->start; + start = so->start; #ifdef DEBUG_NOUVEAU_STATEOBJ if (so->cur_start > 0 && start[so->cur_start - 1].size > so->cur) { @@ -162,7 +162,6 @@ so_method(struct nouveau_stateobj *so, struct nouveau_grobj *gr, } #endif /* DEBUG_NOUVEAU_STATEOBJ */ - so->start = start; start[so->cur_start].gr = gr; start[so->cur_start].mthd = mthd; start[so->cur_start].size = size; @@ -193,11 +192,10 @@ so_reloc(struct nouveau_stateobj *so, struct nouveau_bo *bo, if (so->reloc_alloc <= so->cur_reloc) { debug_printf("exceeding num_reloc size\n"); assert(0); - } else + } #endif /* DEBUG_NOUVEAU_STATEOBJ */ - r = so->reloc; + r = so->reloc; - so->reloc = r; r[so->cur_reloc].bo = NULL; nouveau_bo_ref(bo, &(r[so->cur_reloc].bo)); r[so->cur_reloc].gr = so->start[so->cur_start-1].gr; -- cgit v1.2.3 From ab0a3f30b8070a0204c023f8ea5375f1d3f710fd Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Sat, 7 Aug 2010 01:59:31 +0200 Subject: r300g: fix cbzb clears when hyperz is off --- src/gallium/drivers/r300/r300_flush.c | 3 +-- src/gallium/drivers/r300/r300_hyperz.c | 9 +++++++-- src/gallium/drivers/r300/r300_render.c | 3 +-- src/gallium/drivers/r300/r300_state.c | 3 +-- src/gallium/drivers/r300/r300_state_derived.c | 3 +-- 5 files changed, 11 insertions(+), 10 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/drivers/r300/r300_flush.c b/src/gallium/drivers/r300/r300_flush.c index 7fed9b5d07..fe182b6615 100644 --- a/src/gallium/drivers/r300/r300_flush.c +++ b/src/gallium/drivers/r300/r300_flush.c @@ -44,8 +44,7 @@ static void r300_flush(struct pipe_context* pipe, u_upload_flush(r300->upload_ib); if (r300->dirty_hw) { - if (r300->rws->get_value(r300->rws, R300_CAN_HYPERZ)) - r300_emit_hyperz_end(r300); + r300_emit_hyperz_end(r300); r300_emit_query_end(r300); r300->flush_counter++; diff --git a/src/gallium/drivers/r300/r300_hyperz.c b/src/gallium/drivers/r300/r300_hyperz.c index 10e440ce30..523d547ea9 100644 --- a/src/gallium/drivers/r300/r300_hyperz.c +++ b/src/gallium/drivers/r300/r300_hyperz.c @@ -21,12 +21,14 @@ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE * USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#include "util/u_format.h" -#include "util/u_mm.h" #include "r300_context.h" #include "r300_hyperz.h" #include "r300_reg.h" #include "r300_fs.h" +#include "r300_winsys.h" + +#include "util/u_format.h" +#include "util/u_mm.h" /* HiZ rules - taken from various docs @@ -138,6 +140,9 @@ static void r300_update_hyperz(struct r300_context* r300) return; } + if (!r300->rws->get_value(r300->rws, R300_CAN_HYPERZ)) + return; + /* Zbuffer compression. */ if (r300->z_compression) { z->zb_bw_cntl |= R300_RD_COMP_ENABLE; diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c index 910f5f7113..f2ff65b261 100644 --- a/src/gallium/drivers/r300/r300_render.c +++ b/src/gallium/drivers/r300/r300_render.c @@ -223,8 +223,7 @@ static void r300_prepare_for_rendering(struct r300_context *r300, /* Emitted in flush. */ end_dwords += 26; /* emit_query_end */ - if (r300->rws->get_value(r300->rws, R300_CAN_HYPERZ)) - end_dwords += r300->hyperz_state.size + 2; /* emit_hyperz_end + zcache flush */ + end_dwords += r300->hyperz_state.size + 2; /* emit_hyperz_end + zcache flush */ cs_dwords += end_dwords; diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c index 1e6b81d798..9db5e9e054 100644 --- a/src/gallium/drivers/r300/r300_state.c +++ b/src/gallium/drivers/r300/r300_state.c @@ -689,8 +689,7 @@ void r300_mark_fb_state_dirty(struct r300_context *r300, /* What is marked as dirty depends on the enum r300_fb_state_change. */ r300->gpu_flush.dirty = TRUE; r300->fb_state.dirty = TRUE; - if (r300->rws->get_value(r300->rws, R300_CAN_HYPERZ)) - r300->hyperz_state.dirty = TRUE; + r300->hyperz_state.dirty = TRUE; if (change == R300_CHANGED_FB_STATE) { r300->aa_state.dirty = TRUE; diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c index f3dad4c292..a85b46f5c7 100644 --- a/src/gallium/drivers/r300/r300_state_derived.c +++ b/src/gallium/drivers/r300/r300_state_derived.c @@ -694,6 +694,5 @@ void r300_update_derived_state(struct r300_context* r300) } } - if (r300->rws->get_value(r300->rws, R300_CAN_HYPERZ)) - r300_update_hyperz_state(r300); + r300_update_hyperz_state(r300); } -- cgit v1.2.3 From b55f6279370333574aa11c289f2ad9715cdca24d Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Sun, 8 Aug 2010 19:12:14 +1000 Subject: r300g: take hiz/zmask offsets into a/c when clearing. Need to add a test for multi-hiz/zmask db in a single context. Signed-off-by: Dave Airlie --- src/gallium/drivers/r300/r300_emit.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'src/gallium') diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c index 17e180a79a..0c40e2db93 100644 --- a/src/gallium/drivers/r300/r300_emit.c +++ b/src/gallium/drivers/r300/r300_emit.c @@ -1008,6 +1008,8 @@ void r300_emit_hiz_clear(struct r300_context *r300, unsigned size, void *state) int i; tex = r300_texture(fb->zsbuf->texture); + + offset = tex->hiz_mem[fb->zsbuf->level]->ofs; stride = tex->desc.stride_in_pixels[fb->zsbuf->level]; /* convert from pixels to 4x4 blocks */ @@ -1043,6 +1045,8 @@ void r300_emit_zmask_clear(struct r300_context *r300, unsigned size, void *state tex = r300_texture(fb->zsbuf->texture); stride = tex->desc.stride_in_pixels[fb->zsbuf->level]; + offset = tex->zmask_mem[fb->zsbuf->level]->ofs; + if (r300->z_compression == RV350_Z_COMPRESS_88) mult = 8; else -- cgit v1.2.3 From 445e59057f69131fc0f1585f22c5f281d1d0f4a3 Mon Sep 17 00:00:00 2001 From: nobled Date: Fri, 6 Aug 2010 17:36:41 +0000 Subject: draw: Use the correct type for integers Two integers were being operated on as a vector of floats in draw_llvm_generate(). This bug got uncovered by fixing this bug: http://bugs.freedesktop.org/29407 --- src/gallium/auxiliary/draw/draw_llvm.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/auxiliary/draw/draw_llvm.c b/src/gallium/auxiliary/draw/draw_llvm.c index de99b00a81..993e1101d7 100644 --- a/src/gallium/auxiliary/draw/draw_llvm.c +++ b/src/gallium/auxiliary/draw/draw_llvm.c @@ -683,7 +683,6 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant) unsigned i, j; struct lp_build_context bld; struct lp_build_loop_state lp_loop; - struct lp_type vs_type = lp_type_float_vec(32); const int max_vertices = 4; LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][NUM_CHANNELS]; void *code; @@ -732,7 +731,7 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant) builder = LLVMCreateBuilder(); LLVMPositionBuilderAtEnd(builder, block); - lp_build_context_init(&bld, builder, vs_type); + lp_build_context_init(&bld, builder, lp_type_int(32)); end = lp_build_add(&bld, start, count); @@ -847,7 +846,6 @@ draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *varian struct lp_build_context bld; struct lp_build_context bld_int; struct lp_build_loop_state lp_loop; - struct lp_type vs_type = lp_type_float_vec(32); const int max_vertices = 4; LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][NUM_CHANNELS]; LLVMValueRef fetch_max; @@ -899,7 +897,7 @@ draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *varian builder = LLVMCreateBuilder(); LLVMPositionBuilderAtEnd(builder, block); - lp_build_context_init(&bld, builder, vs_type); + lp_build_context_init(&bld, builder, lp_type_float_vec(32)); lp_build_context_init(&bld_int, builder, lp_type_int(32)); step = LLVMConstInt(LLVMInt32Type(), max_vertices, 0); -- cgit v1.2.3 From b9a21fd6ca036763500e72ce5783867fdfb340e5 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Sun, 8 Aug 2010 13:58:23 +0100 Subject: draw: Remove unused variable. --- src/gallium/auxiliary/draw/draw_llvm.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/auxiliary/draw/draw_llvm.c b/src/gallium/auxiliary/draw/draw_llvm.c index 993e1101d7..8d53601d19 100644 --- a/src/gallium/auxiliary/draw/draw_llvm.c +++ b/src/gallium/auxiliary/draw/draw_llvm.c @@ -844,7 +844,6 @@ draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *varian struct draw_context *draw = llvm->draw; unsigned i, j; struct lp_build_context bld; - struct lp_build_context bld_int; struct lp_build_loop_state lp_loop; const int max_vertices = 4; LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][NUM_CHANNELS]; @@ -897,8 +896,7 @@ draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *varian builder = LLVMCreateBuilder(); LLVMPositionBuilderAtEnd(builder, block); - lp_build_context_init(&bld, builder, lp_type_float_vec(32)); - lp_build_context_init(&bld_int, builder, lp_type_int(32)); + lp_build_context_init(&bld, builder, lp_type_int(32)); step = LLVMConstInt(LLVMInt32Type(), max_vertices, 0); @@ -933,7 +931,7 @@ draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *varian /* make sure we're not out of bounds which can happen * if fetch_count % 4 != 0, because on the last iteration * a few of the 4 vertex fetches will be out of bounds */ - true_index = lp_build_min(&bld_int, true_index, fetch_max); + true_index = lp_build_min(&bld, true_index, fetch_max); fetch_ptr = LLVMBuildGEP(builder, fetch_elts, &true_index, 1, ""); -- cgit v1.2.3 From d8279728165eec2da6031cf543820acad322d192 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Sun, 8 Aug 2010 21:02:59 +0100 Subject: gallivm: Add type checks for the basic operations. --- src/gallium/auxiliary/gallivm/lp_bld_arit.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'src/gallium') diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c b/src/gallium/auxiliary/gallivm/lp_bld_arit.c index f5f2623e46..98e8e4916d 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c @@ -190,6 +190,9 @@ lp_build_add(struct lp_build_context *bld, const struct lp_type type = bld->type; LLVMValueRef res; + assert(lp_check_value(type, a)); + assert(lp_check_value(type, b)); + if(a == bld->zero) return b; if(b == bld->zero) @@ -273,6 +276,9 @@ lp_build_sub(struct lp_build_context *bld, const struct lp_type type = bld->type; LLVMValueRef res; + assert(lp_check_value(type, a)); + assert(lp_check_value(type, b)); + if(b == bld->zero) return a; if(a == bld->undef || b == bld->undef) @@ -395,6 +401,9 @@ lp_build_mul(struct lp_build_context *bld, LLVMValueRef shift; LLVMValueRef res; + assert(lp_check_value(type, a)); + assert(lp_check_value(type, b)); + if(a == bld->zero) return bld->zero; if(a == bld->one) @@ -518,6 +527,9 @@ lp_build_div(struct lp_build_context *bld, { const struct lp_type type = bld->type; + assert(lp_check_value(type, a)); + assert(lp_check_value(type, b)); + if(a == bld->zero) return bld->zero; if(a == bld->one) -- cgit v1.2.3 From 757c78afe7cca6a05c88c0c203fa5f4488ebd0ed Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Sat, 7 Aug 2010 02:05:03 +0200 Subject: r300g: generalize the way we ask for hyperz This makes it compatible with the modified DRM interface in drm-radeon-testing. Also, now you need to set RADEON_HYPERZ=1 to be able to use hyperz. It's not bug-free yet. --- src/gallium/winsys/radeon/drm/radeon_drm.c | 35 ++++++++++++++++++++++-------- 1 file changed, 26 insertions(+), 9 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/winsys/radeon/drm/radeon_drm.c b/src/gallium/winsys/radeon/drm/radeon_drm.c index ecaf096dea..593741b86c 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm.c +++ b/src/gallium/winsys/radeon/drm/radeon_drm.c @@ -55,6 +55,31 @@ radeon_winsys_create(int fd) return rws; } +/* Enable/disable Hyper-Z access. Return TRUE on success. */ +static boolean radeon_set_hyperz_access(int fd, boolean enable) +{ +#ifndef RADEON_INFO_WANT_HYPERZ +#define RADEON_INFO_WANT_HYPERZ 7 +#endif + + struct drm_radeon_info info = {0}; + unsigned value = enable ? 1 : 0; + + if (!debug_get_bool_option("RADEON_HYPERZ", FALSE)) + return FALSE; + + info.value = (unsigned long)&value; + info.request = RADEON_INFO_WANT_HYPERZ; + + if (drmCommandWriteRead(fd, DRM_RADEON_INFO, &info, sizeof(info)) != 0) + return FALSE; + + if (enable && !value) + return FALSE; + + return TRUE; +} + /* Helper function to do the ioctls needed for setup and init. */ static void do_ioctls(int fd, struct radeon_libdrm_winsys* winsys) { @@ -134,15 +159,7 @@ static void do_ioctls(int fd, struct radeon_libdrm_winsys* winsys) } winsys->z_pipes = target; - winsys->hyperz = FALSE; -#ifndef RADEON_INFO_WANT_HYPERZ -#define RADEON_INFO_WANT_HYPERZ 7 -#endif - info.request = RADEON_INFO_WANT_HYPERZ; - retval = drmCommandWriteRead(fd, DRM_RADEON_INFO, &info, sizeof(info)); - if (!retval && target == 1) { - winsys->hyperz = TRUE; - } + winsys->hyperz = radeon_set_hyperz_access(fd, TRUE); retval = drmCommandWriteRead(fd, DRM_RADEON_GEM_INFO, &gem_info, sizeof(gem_info)); -- cgit v1.2.3 From 363b74f132a5a329fac25322f3c5c227c55b94a8 Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Sun, 8 Aug 2010 18:21:53 +0200 Subject: r300g: do not allocate a zmask block for 3D textures and cubemaps --- src/gallium/drivers/r300/r300_hyperz.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'src/gallium') diff --git a/src/gallium/drivers/r300/r300_hyperz.c b/src/gallium/drivers/r300/r300_hyperz.c index 523d547ea9..3b0adc3584 100644 --- a/src/gallium/drivers/r300/r300_hyperz.c +++ b/src/gallium/drivers/r300/r300_hyperz.c @@ -338,6 +338,12 @@ void r300_zmask_alloc_block(struct r300_context *r300, struct r300_surface *surf tex = r300_texture(surf->base.texture); + /* We currently don't handle decompression for 3D textures and cubemaps + * correctly. */ + if (tex->desc.b.b.target != PIPE_TEXTURE_1D && + tex->desc.b.b.target != PIPE_TEXTURE_2D) + return; + if (tex->zmask_mem[level]) return; -- cgit v1.2.3 From 4f5e51068bce4e32a9561b4b4d6f3feca33642bf Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Sun, 8 Aug 2010 18:43:42 +0200 Subject: r300g: flush zmasks of zbuffers we are going to use as samplers It sometimes works, sometimes not. I guess we have the zmask offsets wrong. --- src/gallium/drivers/r300/r300_blit.c | 19 ++++++++------ src/gallium/drivers/r300/r300_context.h | 7 ++++++ src/gallium/drivers/r300/r300_emit.c | 3 +++ src/gallium/drivers/r300/r300_state_derived.c | 36 +++++++++++++++++++++++++++ 4 files changed, 58 insertions(+), 7 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/drivers/r300/r300_blit.c b/src/gallium/drivers/r300/r300_blit.c index 6f8d9abfc8..18d00d61f4 100644 --- a/src/gallium/drivers/r300/r300_blit.c +++ b/src/gallium/drivers/r300/r300_blit.c @@ -259,27 +259,32 @@ static void r300_clear_depth_stencil(struct pipe_context *pipe, r300_blitter_end(r300); } -/* Clear a region of a depth stencil surface. */ -static void r300_flush_depth_stencil(struct pipe_context *pipe, - struct pipe_resource *dst, - struct pipe_subresource subdst) +/* Flush a depth stencil buffer. */ +void r300_flush_depth_stencil(struct pipe_context *pipe, + struct pipe_resource *dst, + struct pipe_subresource subdst, + unsigned zslice) { struct r300_context *r300 = r300_context(pipe); struct pipe_surface *dstsurf; struct r300_texture *tex = r300_texture(dst); - /* only flush the zmask if we have one attached to this texture */ if (!tex->zmask_mem[subdst.level]) return; + if (!tex->dirty_zmask[subdst.level]) + return; dstsurf = pipe->screen->get_tex_surface(pipe->screen, dst, - subdst.face, subdst.level, 0, + subdst.face, subdst.level, zslice, PIPE_BIND_DEPTH_STENCIL); r300->z_decomp_rd = TRUE; r300_blitter_begin(r300, R300_CLEAR_SURFACE); util_blitter_flush_depth_stencil(r300->blitter, dstsurf); r300_blitter_end(r300); r300->z_decomp_rd = FALSE; + + tex->dirty_zmask[subdst.level] = FALSE; + pipe->flush(pipe, 0, NULL); } /* Copy a block of pixels from one surface to another using HW. */ @@ -342,7 +347,7 @@ static void r300_resource_copy_region(struct pipe_context *pipe, is_depth = util_format_get_component_bits(src->format, UTIL_FORMAT_COLORSPACE_ZS, 0) != 0; if (is_depth) { - r300_flush_depth_stencil(pipe, src, subsrc); + r300_flush_depth_stencil(pipe, src, subsrc, srcz); } if (old_format != new_format) { dst->format = new_format; diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h index d86a5c8fc9..8b772f3887 100644 --- a/src/gallium/drivers/r300/r300_context.h +++ b/src/gallium/drivers/r300/r300_context.h @@ -397,6 +397,7 @@ struct r300_texture { /* hyper-z memory allocs */ struct mem_block *hiz_mem[R300_MAX_TEXTURE_LEVELS]; struct mem_block *zmask_mem[R300_MAX_TEXTURE_LEVELS]; + boolean dirty_zmask[R300_MAX_TEXTURE_LEVELS]; /* This is the level tiling flags were last time set for. * It's used to prevent redundant tiling-flags changes from happening.*/ @@ -628,6 +629,12 @@ void r300_init_render_functions(struct r300_context *r300); void r300_init_state_functions(struct r300_context* r300); void r300_init_resource_functions(struct r300_context* r300); +/* r300_blit.c */ +void r300_flush_depth_stencil(struct pipe_context *pipe, + struct pipe_resource *dst, + struct pipe_subresource subdst, + unsigned zslice); + /* r300_query.c */ void r300_resume_query(struct r300_context *r300, struct r300_query *query); diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c index 0c40e2db93..c35774c149 100644 --- a/src/gallium/drivers/r300/r300_emit.c +++ b/src/gallium/drivers/r300/r300_emit.c @@ -1069,6 +1069,9 @@ void r300_emit_zmask_clear(struct r300_context *r300, unsigned size, void *state offset <<= offset_shift; r300_emit_zmask_line_clear(r300, offset, stride, 0x0);//0xffffffff); } + + /* Mark the current zbuffer's zmask as dirty. */ + tex->dirty_zmask[fb->zsbuf->level] = TRUE; } void r300_emit_ztop_state(struct r300_context* r300, diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c index a85b46f5c7..693b1e29f2 100644 --- a/src/gallium/drivers/r300/r300_state_derived.c +++ b/src/gallium/drivers/r300/r300_state_derived.c @@ -677,8 +677,44 @@ static void r300_merge_textures_and_samplers(struct r300_context* r300) } } +/* We can't use compressed zbuffers as samplers. */ +static void r300_flush_depth_textures(struct r300_context *r300) +{ + struct r300_textures_state *state = + (struct r300_textures_state*)r300->textures_state.state; + unsigned i, level; + unsigned count = MIN2(state->sampler_view_count, + state->sampler_state_count); + + if (r300->z_decomp_rd) + return; + + for (i = 0; i < count; i++) + if (state->sampler_views[i] && state->sampler_states[i]) { + struct pipe_resource *tex = state->sampler_views[i]->base.texture; + + if (tex->target == PIPE_TEXTURE_3D || + tex->target == PIPE_TEXTURE_CUBE) + continue; + + /* Ignore non-depth textures. + * Also ignore reinterpreted depth textures, e.g. resource_copy. */ + if (!util_format_is_depth_or_stencil(tex->format)) + continue; + + for (level = 0; level <= tex->last_level; level++) + if (r300_texture(tex)->dirty_zmask[level]) { + /* We don't handle 3D textures and cubemaps yet. */ + r300_flush_depth_stencil(&r300->context, tex, + u_subresource(0, level), 0); + } + } +} + void r300_update_derived_state(struct r300_context* r300) { + r300_flush_depth_textures(r300); + if (r300->textures_state.dirty) { r300_merge_textures_and_samplers(r300); } -- cgit v1.2.3 From cd5af8c703d84dd856528554fa615e9787ebe75f Mon Sep 17 00:00:00 2001 From: nobled Date: Sun, 8 Aug 2010 20:17:30 +0000 Subject: gallivm: Use the correct context for integers See: http://bugs.freedesktop.org/29407 --- src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium') diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c index 21236839fb..048b29929a 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c @@ -533,7 +533,7 @@ emit_fetch( reg->Register.Index * 4 + swizzle); /* index_vec = index_vec + addr_vec */ - index_vec = lp_build_add(&bld->base, index_vec, addr_vec); + index_vec = lp_build_add(&bld->int_bld, index_vec, addr_vec); /* Gather values from the constant buffer */ res = build_gather(bld, bld->consts_ptr, index_vec); -- cgit v1.2.3 From 12f5c0f9ce497e99854e0a3a7f5ff297a2a0a1e3 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Sun, 8 Aug 2010 22:18:53 +0100 Subject: gallivm: Fix more integer operations. --- src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c index 048b29929a..42d796cb95 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c @@ -489,7 +489,7 @@ get_indirect_offsets(struct lp_build_tgsi_soa_context *bld, int_vec_type, ""); /* addr_vec = addr_vec * 4 */ - addr_vec = lp_build_mul(&bld->base, addr_vec, vec4); + addr_vec = lp_build_mul(&bld->int_bld, addr_vec, vec4); return addr_vec; } @@ -773,7 +773,9 @@ emit_store( addr = LLVMBuildExtractElement(bld->base.builder, addr, LLVMConstInt(LLVMInt32Type(), 0, 0), ""); - addr = lp_build_mul(&bld->base, addr, LLVMConstInt(LLVMInt32Type(), 4, 0)); + addr = LLVMBuildMul(bld->base.builder, + addr, LLVMConstInt(LLVMInt32Type(), 4, 0), + ""); } switch( reg->Register.File ) { -- cgit v1.2.3 From aef0fbd5b6e0b29342d09722c98d512b3661c31b Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Sun, 8 Aug 2010 23:09:46 +0200 Subject: r300g: remove a flush Ooops, it wasn't supposed to be there. --- src/gallium/drivers/r300/r300_blit.c | 1 - 1 file changed, 1 deletion(-) (limited to 'src/gallium') diff --git a/src/gallium/drivers/r300/r300_blit.c b/src/gallium/drivers/r300/r300_blit.c index 18d00d61f4..ff52286b5c 100644 --- a/src/gallium/drivers/r300/r300_blit.c +++ b/src/gallium/drivers/r300/r300_blit.c @@ -284,7 +284,6 @@ void r300_flush_depth_stencil(struct pipe_context *pipe, r300->z_decomp_rd = FALSE; tex->dirty_zmask[subdst.level] = FALSE; - pipe->flush(pipe, 0, NULL); } /* Copy a block of pixels from one surface to another using HW. */ -- cgit v1.2.3 From 3bb0719fe1514d2a8fd4674203882fdb08232172 Mon Sep 17 00:00:00 2001 From: Luca Barbieri Date: Mon, 9 Aug 2010 04:56:03 +0200 Subject: nouveau: fix maps with PIPE_TRANSFER_UNSYNCHRONIZED | PIPE_TRANSFER_DONTBLOCK In this case, we were incorrectly prioritizing PIPE_TRANSFER_DONTBLOCK over PIPE_TRANSFER_UNSYNCHRONIZED. This can lead to failure in the Mesa VBO draw paths that end up specifying both, but don't expect map to fail (in particular, the problem manifested as a leak of buffer objects in teapot with other changes). --- src/gallium/drivers/nouveau/nouveau_winsys.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/drivers/nouveau/nouveau_winsys.h b/src/gallium/drivers/nouveau/nouveau_winsys.h index df79ca89ca..c6c93d40b8 100644 --- a/src/gallium/drivers/nouveau/nouveau_winsys.h +++ b/src/gallium/drivers/nouveau/nouveau_winsys.h @@ -24,11 +24,10 @@ nouveau_screen_transfer_flags(unsigned pipe) flags |= NOUVEAU_BO_WR; if (pipe & PIPE_TRANSFER_DISCARD) flags |= NOUVEAU_BO_INVAL; - if (pipe & PIPE_TRANSFER_DONTBLOCK) - flags |= NOUVEAU_BO_NOWAIT; - else if (pipe & PIPE_TRANSFER_UNSYNCHRONIZED) flags |= NOUVEAU_BO_NOSYNC; + else if (pipe & PIPE_TRANSFER_DONTBLOCK) + flags |= NOUVEAU_BO_NOWAIT; return flags; } -- cgit v1.2.3 From 771ad674142001edba1802f82c89c1071cf72ca2 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Mon, 9 Aug 2010 19:56:45 +1000 Subject: r300g: fix hiz/zmask offset emissions. ofs is in dwords, so need to shift it for registers. Signed-off-by: Dave Airlie --- src/gallium/drivers/r300/r300_emit.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c index c35774c149..7bd43b6eb5 100644 --- a/src/gallium/drivers/r300/r300_emit.c +++ b/src/gallium/drivers/r300/r300_emit.c @@ -393,7 +393,7 @@ void r300_emit_fb_state(struct r300_context* r300, unsigned size, void* state) /* HiZ RAM. */ if (r300->screen->caps.hiz_ram) { if (tex->hiz_mem[level]) { - OUT_CS_REG(R300_ZB_HIZ_OFFSET, tex->hiz_mem[level]->ofs); + OUT_CS_REG(R300_ZB_HIZ_OFFSET, tex->hiz_mem[level]->ofs << 2); OUT_CS_REG(R300_ZB_HIZ_PITCH, surf_pitch); } else { OUT_CS_REG(R300_ZB_HIZ_OFFSET, 0); @@ -402,7 +402,7 @@ void r300_emit_fb_state(struct r300_context* r300, unsigned size, void* state) } /* Z Mask RAM. (compressed zbuffer) */ if (tex->zmask_mem[level]) { - OUT_CS_REG(R300_ZB_ZMASK_OFFSET, tex->zmask_mem[level]->ofs); + OUT_CS_REG(R300_ZB_ZMASK_OFFSET, tex->zmask_mem[level]->ofs << 2); OUT_CS_REG(R300_ZB_ZMASK_PITCH, surf_pitch); } else { OUT_CS_REG(R300_ZB_ZMASK_OFFSET, 0); -- cgit v1.2.3 From 00963589b4d92460e3ae2c1557a5d816b5c67a6d Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Mon, 9 Aug 2010 14:53:58 +0200 Subject: r600g: fill out some missing caps and sort them The shader caps need additional corrections. (based on a patch from netkas at Phoronix) --- src/gallium/drivers/r600/r600_screen.c | 97 +++++++++++++++++++++++----------- 1 file changed, 65 insertions(+), 32 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/drivers/r600/r600_screen.c b/src/gallium/drivers/r600/r600_screen.c index 4b87327a7c..6ec842d591 100644 --- a/src/gallium/drivers/r600/r600_screen.c +++ b/src/gallium/drivers/r600/r600_screen.c @@ -53,59 +53,92 @@ static const char* r600_get_name(struct pipe_screen* pscreen) static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param) { switch (param) { - case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS: - case PIPE_CAP_MAX_COMBINED_SAMPLERS: - return 16; + /* Supported features (boolean caps). */ case PIPE_CAP_NPOT_TEXTURES: - return 1; case PIPE_CAP_TWO_SIDED_STENCIL: - return 1; case PIPE_CAP_GLSL: - return 1; case PIPE_CAP_DUAL_SOURCE_BLEND: - return 1; case PIPE_CAP_ANISOTROPIC_FILTER: - return 1; case PIPE_CAP_POINT_SPRITE: - return 1; - case PIPE_CAP_MAX_RENDER_TARGETS: - /* FIXME some r6xx are buggy and can only do 4 */ - return 8; case PIPE_CAP_OCCLUSION_QUERY: - return 1; case PIPE_CAP_TEXTURE_SHADOW_MAP: - return 1; - case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: - case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: - case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: - /* FIXME not sure here */ - return 13; case PIPE_CAP_TEXTURE_MIRROR_CLAMP: - return 1; case PIPE_CAP_TEXTURE_MIRROR_REPEAT: - return 1; - case PIPE_CAP_MAX_VERTEX_TEXTURE_UNITS: - /* FIXME allow this once infrastructure is there */ - return 0; - case PIPE_CAP_TGSI_CONT_SUPPORTED: - return 0; case PIPE_CAP_BLEND_EQUATION_SEPARATE: - return 1; case PIPE_CAP_SM3: - return 1; + case PIPE_CAP_TEXTURE_SWIZZLE: case PIPE_CAP_INDEP_BLEND_ENABLE: - return 1; - case PIPE_CAP_INDEP_BLEND_FUNC: - /* FIXME allow this */ - return 0; case PIPE_CAP_DEPTHSTENCIL_CLEAR_SEPARATE: return 1; + + /* Unsupported features (boolean caps). */ + case PIPE_CAP_TIMER_QUERY: + case PIPE_CAP_TGSI_CONT_SUPPORTED: + case PIPE_CAP_STREAM_OUTPUT: + case PIPE_CAP_INDEP_BLEND_FUNC: /* FIXME allow this */ + case PIPE_CAP_GEOMETRY_SHADER4: + case PIPE_CAP_DEPTH_CLAMP: /* FIXME allow this */ + return 0; + + /* Texturing. */ + case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: + case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: + case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: + return 14; + case PIPE_CAP_MAX_VERTEX_TEXTURE_UNITS: + /* FIXME allow this once infrastructure is there */ + return 0; + case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS: + case PIPE_CAP_MAX_COMBINED_SAMPLERS: + return 16; + + /* Render targets. */ + case PIPE_CAP_MAX_RENDER_TARGETS: + /* FIXME some r6xx are buggy and can only do 4 */ + return 8; + + /* Fragment coordinate conventions. */ case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT: case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER: return 1; case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT: case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER: return 0; + + /* Shader limits. */ + case PIPE_CAP_MAX_VS_INSTRUCTIONS: + case PIPE_CAP_MAX_VS_ALU_INSTRUCTIONS: + case PIPE_CAP_MAX_VS_TEX_INSTRUCTIONS: + case PIPE_CAP_MAX_VS_TEX_INDIRECTIONS: + case PIPE_CAP_MAX_FS_INSTRUCTIONS: + case PIPE_CAP_MAX_FS_ALU_INSTRUCTIONS: + case PIPE_CAP_MAX_FS_TEX_INSTRUCTIONS: + case PIPE_CAP_MAX_FS_TEX_INDIRECTIONS: + return 8192; + case PIPE_CAP_MAX_VS_CONTROL_FLOW_DEPTH: + case PIPE_CAP_MAX_FS_CONTROL_FLOW_DEPTH: + return 8; /* FIXME */ + case PIPE_CAP_MAX_VS_INPUTS: + case PIPE_CAP_MAX_FS_INPUTS: + return 32; + case PIPE_CAP_MAX_VS_TEMPS: + case PIPE_CAP_MAX_FS_TEMPS: + return 128; + case PIPE_CAP_MAX_VS_ADDRS: + case PIPE_CAP_MAX_FS_ADDRS: + return 1; /* FIXME Isn't this equal to TEMPS? */ + case PIPE_CAP_MAX_VS_CONSTS: + case PIPE_CAP_MAX_FS_CONSTS: + return 256; /* FIXME I believe this should be much higher. */ + case PIPE_CAP_MAX_CONST_BUFFERS: + return 1; + case PIPE_CAP_MAX_CONST_BUFFER_SIZE: /* in bytes */ + return 4096; + case PIPE_CAP_MAX_PREDICATE_REGISTERS: + case PIPE_CAP_MAX_VS_PREDS: + case PIPE_CAP_MAX_FS_PREDS: + return 0; /* FIXME */ + default: R600_ERR("r600: unknown param %d\n", param); return 0; -- cgit v1.2.3 From 2cad5350f9691d4d2c18a637548735925fa0ee97 Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Mon, 9 Aug 2010 14:57:56 +0200 Subject: r600g: fix some warnings --- src/gallium/drivers/r600/r600_asm.c | 4 ++-- src/gallium/drivers/r600/r600_draw.c | 2 +- src/gallium/drivers/r600/r600_screen.h | 2 ++ 3 files changed, 5 insertions(+), 3 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c index f1dc3dc3a9..16c98504ad 100644 --- a/src/gallium/drivers/r600/r600_asm.c +++ b/src/gallium/drivers/r600/r600_asm.c @@ -287,7 +287,7 @@ static int r600_bc_tex_build(struct r600_bc *bc, struct r600_bc_tex *tex, unsign return 0; } -int r600_bc_alu_build(struct r600_bc *bc, struct r600_bc_alu *alu, unsigned id) +static int r600_bc_alu_build(struct r600_bc *bc, struct r600_bc_alu *alu, unsigned id) { unsigned i; @@ -331,7 +331,7 @@ int r600_bc_alu_build(struct r600_bc *bc, struct r600_bc_alu *alu, unsigned id) return 0; } -int r600_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf) +static int r600_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf) { unsigned id = cf->id; diff --git a/src/gallium/drivers/r600/r600_draw.c b/src/gallium/drivers/r600/r600_draw.c index 2420b76318..f058455162 100644 --- a/src/gallium/drivers/r600/r600_draw.c +++ b/src/gallium/drivers/r600/r600_draw.c @@ -127,7 +127,7 @@ static int r600_draw_common(struct r600_draw *draw) draw->draw->states[R600_DRAW__VGT_NUM_INDICES] = draw->count; draw->draw->states[R600_DRAW__VGT_DRAW_INITIATOR] = vgt_draw_initiator; if (draw->index_buffer) { - rbuffer = (struct r600_buffer*)draw->index_buffer; + rbuffer = (struct r600_resource*)draw->index_buffer; draw->draw->bo[0] = radeon_bo_incref(rscreen->rw, rbuffer->bo); draw->draw->placement[0] = RADEON_GEM_DOMAIN_GTT; draw->draw->placement[1] = RADEON_GEM_DOMAIN_GTT; diff --git a/src/gallium/drivers/r600/r600_screen.h b/src/gallium/drivers/r600/r600_screen.h index 9a452ecfe3..53b560c617 100644 --- a/src/gallium/drivers/r600/r600_screen.h +++ b/src/gallium/drivers/r600/r600_screen.h @@ -80,4 +80,6 @@ void r600_texture_transfer_unmap(struct pipe_context *ctx, int r600_conv_pipe_format(unsigned pformat, unsigned *format); int r600_conv_pipe_prim(unsigned pprim, unsigned *prim); +void r600_init_screen_texture_functions(struct pipe_screen *screen); + #endif -- cgit v1.2.3 From 29b7d26401a77ee4f772233b003d942bba59bb63 Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Mon, 9 Aug 2010 17:15:37 +0200 Subject: r300g: do not print shader compiler errors by default --- src/gallium/drivers/r300/r300_fs.c | 4 ++-- src/gallium/drivers/r300/r300_vs.c | 5 ++--- 2 files changed, 4 insertions(+), 5 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/drivers/r300/r300_fs.c b/src/gallium/drivers/r300/r300_fs.c index 87ff49a90c..5c905c1159 100644 --- a/src/gallium/drivers/r300/r300_fs.c +++ b/src/gallium/drivers/r300/r300_fs.c @@ -383,7 +383,7 @@ static void r300_translate_fragment_shader( find_output_registers(&compiler, shader); if (compiler.Base.Debug) { - debug_printf("r300: Initial fragment program\n"); + DBG(r300, DBG_FP, "r300: Initial fragment program\n"); tgsi_dump(tokens, 0); } @@ -418,7 +418,7 @@ static void r300_translate_fragment_shader( } if (compiler.Base.Error) { - fprintf(stderr, "r300 FP: Compiler Error:\n%sUsing a dummy shader" + DBG(r300, DBG_FP, "r300 FP: Compiler Error:\n%sUsing a dummy shader" " instead.\nIf there's an 'unknown opcode' message, please" " file a bug report and attach this log.\n", compiler.Base.ErrorMsg); diff --git a/src/gallium/drivers/r300/r300_vs.c b/src/gallium/drivers/r300/r300_vs.c index b25c786d6b..54c8de1241 100644 --- a/src/gallium/drivers/r300/r300_vs.c +++ b/src/gallium/drivers/r300/r300_vs.c @@ -207,7 +207,7 @@ void r300_translate_vertex_shader(struct r300_context *r300, compiler.Base.max_temp_regs = 32; if (compiler.Base.Debug) { - debug_printf("r300: Initial vertex program\n"); + DBG(r300, DBG_VP, "r300: Initial vertex program\n"); tgsi_dump(vs->state.tokens, 0); } @@ -227,8 +227,7 @@ void r300_translate_vertex_shader(struct r300_context *r300, /* Invoke the compiler */ r3xx_compile_vertex_program(&compiler); if (compiler.Base.Error) { - /* XXX We should fallback using Draw. */ - fprintf(stderr, "r300 VP: Compiler error:\n%sUsing a dummy shader" + DBG(r300, DBG_VP, "r300 VP: Compiler error:\n%sUsing a dummy shader" " instead.\nIf there's an 'unknown opcode' message, please" " file a bug report and attach this log.\n", compiler.Base.ErrorMsg); -- cgit v1.2.3 From 95fb0bf58dde0b81ce601d3f0477fd1b2a5a28d4 Mon Sep 17 00:00:00 2001 From: Jerome Glisse Date: Mon, 9 Aug 2010 11:32:45 -0400 Subject: r600g: fix r600 context structure, avoid segfault when no scissor Signed-off-by: Jerome Glisse --- src/gallium/drivers/r600/r600_context.h | 2 +- src/gallium/drivers/r600/r600_state.c | 17 +++++++++++++++-- 2 files changed, 16 insertions(+), 3 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/drivers/r600/r600_context.h b/src/gallium/drivers/r600/r600_context.h index 431f8951b2..c606dbbda3 100644 --- a/src/gallium/drivers/r600/r600_context.h +++ b/src/gallium/drivers/r600/r600_context.h @@ -94,7 +94,7 @@ struct r600_context_hw_states { struct radeon_state *dsa; struct radeon_state *blend; struct radeon_state *viewport; - struct radeon_state *cb[7]; + struct radeon_state *cb[8]; struct radeon_state *config; struct radeon_state *cb_cntl; struct radeon_state *db; diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index ff621084d4..cad5185e32 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -836,12 +836,25 @@ static struct radeon_state *r600_rasterizer(struct r600_context *rctx) static struct radeon_state *r600_scissor(struct r600_context *rctx) { const struct pipe_scissor_state *state = &rctx->scissor->state.scissor; + const struct pipe_framebuffer_state *fb = &rctx->framebuffer->state.framebuffer; struct r600_screen *rscreen = rctx->screen; struct radeon_state *rstate; + unsigned minx, maxx, miny, maxy; u32 tl, br; - tl = S_028240_TL_X(state->minx) | S_028240_TL_Y(state->miny) | S_028240_WINDOW_OFFSET_DISABLE(1); - br = S_028244_BR_X(state->maxx) | S_028244_BR_Y(state->maxy); + if (state == NULL) { + minx = 0; + miny = 0; + maxx = fb->cbufs[0]->width; + maxy = fb->cbufs[0]->height; + } else { + minx = state->minx; + miny = state->miny; + maxx = state->maxx; + maxy = state->maxy; + } + tl = S_028240_TL_X(minx) | S_028240_TL_Y(miny) | S_028240_WINDOW_OFFSET_DISABLE(1); + br = S_028244_BR_X(maxx) | S_028244_BR_Y(maxy); rstate = radeon_state(rscreen->rw, R600_SCISSOR_TYPE, R600_SCISSOR); if (rstate == NULL) return NULL; -- cgit v1.2.3 From 65b9747a54490dd56cd5cee4c2c1b9f51d35f133 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Mon, 9 Aug 2010 16:41:44 +0100 Subject: util: Move _mm_shuffle_epi8() to u_sse.h. It's bound to be useful elsewhere. --- src/gallium/auxiliary/util/u_sse.h | 29 +++++++++++++++++++++++++++++ src/gallium/drivers/llvmpipe/lp_tile_soa.py | 29 +---------------------------- 2 files changed, 30 insertions(+), 28 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/auxiliary/util/u_sse.h b/src/gallium/auxiliary/util/u_sse.h index 6145e34aa3..87959ab0aa 100644 --- a/src/gallium/auxiliary/util/u_sse.h +++ b/src/gallium/auxiliary/util/u_sse.h @@ -71,6 +71,35 @@ _mm_castps_si128(__m128 a) #endif /* defined(_MSC_VER) && _MSC_VER < 1500 */ + +#if defined(PIPE_ARCH_SSSE3) + +#include + +#else /* !PIPE_ARCH_SSSE3 */ + +#include + +/** + * Describe _mm_shuffle_epi8() with gcc extended inline assembly, for cases + * where -mssse3 is not supported/enabled. + * + * MSVC will never get in here as its intrinsics support do not rely on + * compiler command line options. + */ +static __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_shuffle_epi8(__m128i a, __m128i mask) +{ + __m128i result; + __asm__("pshufb %1, %0" + : "=x" (result) + : "xm" (mask), "0" (a)); + return result; +} + +#endif /* !PIPE_ARCH_SSSE3 */ + + #endif /* PIPE_ARCH_X86 || PIPE_ARCH_X86_64 */ #endif /* U_SSE_H_ */ diff --git a/src/gallium/drivers/llvmpipe/lp_tile_soa.py b/src/gallium/drivers/llvmpipe/lp_tile_soa.py index c71ec8066c..2ba39052ab 100644 --- a/src/gallium/drivers/llvmpipe/lp_tile_soa.py +++ b/src/gallium/drivers/llvmpipe/lp_tile_soa.py @@ -293,34 +293,7 @@ def generate_ssse3(): print ''' #if defined(PIPE_ARCH_SSE) - -#if defined(PIPE_ARCH_SSSE3) - -#include - -#else - -#include - -/** - * Describe _mm_shuffle_epi8() with gcc extended inline assembly, for cases - * where -mssse3 is not supported/enabled. - * - * MSVC will never get in here as its intrinsics support do not rely on - * compiler command line options. - */ -static __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm_shuffle_epi8(__m128i a, __m128i mask) -{ - __m128i result; - __asm__("pshufb %1, %0" - : "=x" (result) - : "xm" (mask), "0" (a)); - return result; -} - -#endif - +#include "util/u_sse.h" static void lp_tile_b8g8r8a8_unorm_swizzle_4ub_ssse3(uint8_t *dst, -- cgit v1.2.3 From fc9a49b638c26801951c33a570178bbb2b67ec60 Mon Sep 17 00:00:00 2001 From: nobled Date: Sun, 8 Aug 2010 19:44:54 +0000 Subject: gallivm: Always use floating-point operators for floating-point types MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This fixes the assert added in LLVM 2.8: assert(getType()->isIntOrIntVectorTy() && "Tried to create an integer operation on a non-integer type!") But it also fixes some subtle bugs, since we should've been doing this since LLVM 2.6 anyway. Includes a modified patch from steckdenis@yahoo.fr for the FNeg instructions in emit_fetch(); thanks for pointing those out. http://bugs.freedesktop.org/29404 http://bugs.freedesktop.org/29407 Signed-off-by: José Fonseca --- src/gallium/auxiliary/gallivm/lp_bld_arit.c | 181 +++++++++++++--------- src/gallium/auxiliary/gallivm/lp_bld_conv.c | 14 +- src/gallium/auxiliary/gallivm/lp_bld_format_aos.c | 4 +- src/gallium/auxiliary/gallivm/lp_bld_format_soa.c | 4 +- src/gallium/auxiliary/gallivm/lp_bld_quad.c | 4 +- src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c | 12 +- src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c | 10 +- 7 files changed, 137 insertions(+), 92 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c b/src/gallium/auxiliary/gallivm/lp_bld_arit.c index 98e8e4916d..d2dde41e9f 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c @@ -173,9 +173,15 @@ lp_build_comp(struct lp_build_context *bld, } if(LLVMIsConstant(a)) - return LLVMConstSub(bld->one, a); + if (type.floating) + return LLVMConstFSub(bld->one, a); + else + return LLVMConstSub(bld->one, a); else - return LLVMBuildSub(bld->builder, bld->one, a, ""); + if (type.floating) + return LLVMBuildFSub(bld->builder, bld->one, a, ""); + else + return LLVMBuildSub(bld->builder, bld->one, a, ""); } @@ -220,9 +226,15 @@ lp_build_add(struct lp_build_context *bld, } if(LLVMIsConstant(a) && LLVMIsConstant(b)) - res = LLVMConstAdd(a, b); + if (type.floating) + res = LLVMConstFAdd(a, b); + else + res = LLVMConstAdd(a, b); else - res = LLVMBuildAdd(bld->builder, a, b, ""); + if (type.floating) + res = LLVMBuildFAdd(bld->builder, a, b, ""); + else + res = LLVMBuildAdd(bld->builder, a, b, ""); /* clamp to ceiling of 1.0 */ if(bld->type.norm && (bld->type.floating || bld->type.fixed)) @@ -256,9 +268,16 @@ lp_build_sum_vector(struct lp_build_context *bld, for (i = 1; i < type.length; i++) { index = LLVMConstInt(LLVMInt32Type(), i, 0); - res = LLVMBuildAdd(bld->builder, res, - LLVMBuildExtractElement(bld->builder, a, index, ""), - ""); + if (type.floating) + res = LLVMBuildFAdd(bld->builder, res, + LLVMBuildExtractElement(bld->builder, + a, index, ""), + ""); + else + res = LLVMBuildAdd(bld->builder, res, + LLVMBuildExtractElement(bld->builder, + a, index, ""), + ""); } return res; @@ -306,9 +325,15 @@ lp_build_sub(struct lp_build_context *bld, } if(LLVMIsConstant(a) && LLVMIsConstant(b)) - res = LLVMConstSub(a, b); + if (type.floating) + res = LLVMConstFSub(a, b); + else + res = LLVMConstSub(a, b); else - res = LLVMBuildSub(bld->builder, a, b, ""); + if (type.floating) + res = LLVMBuildFSub(bld->builder, a, b, ""); + else + res = LLVMBuildSub(bld->builder, a, b, ""); if(bld->type.norm && (bld->type.floating || bld->type.fixed)) res = lp_build_max_simple(bld, res, bld->zero); @@ -442,7 +467,10 @@ lp_build_mul(struct lp_build_context *bld, shift = NULL; if(LLVMIsConstant(a) && LLVMIsConstant(b)) { - res = LLVMConstMul(a, b); + if (type.floating) + res = LLVMConstFMul(a, b); + else + res = LLVMConstMul(a, b); if(shift) { if(type.sign) res = LLVMConstAShr(res, shift); @@ -451,7 +479,10 @@ lp_build_mul(struct lp_build_context *bld, } } else { - res = LLVMBuildMul(bld->builder, a, b, ""); + if (type.floating) + res = LLVMBuildFMul(bld->builder, a, b, ""); + else + res = LLVMBuildMul(bld->builder, a, b, ""); if(shift) { if(type.sign) res = LLVMBuildAShr(bld->builder, res, shift, ""); @@ -481,7 +512,10 @@ lp_build_mul_imm(struct lp_build_context *bld, return a; if(b == -1) - return LLVMBuildNeg(bld->builder, a, ""); + if (bld->type.floating) + return LLVMBuildFNeg(bld->builder, a, ""); + else + return LLVMBuildNeg(bld->builder, a, ""); if(b == 2 && bld->type.floating) return lp_build_add(bld, a, a); @@ -714,7 +748,12 @@ LLVMValueRef lp_build_negate(struct lp_build_context *bld, LLVMValueRef a) { - return LLVMBuildNeg(bld->builder, a, ""); + if (bld->type.floating) + a = LLVMBuildFNeg(bld->builder, a, ""); + else + a = LLVMBuildNeg(bld->builder, a, ""); + + return a; } @@ -1033,7 +1072,7 @@ lp_build_iround(struct lp_build_context *bld, half = LLVMBuildOr(bld->builder, sign, half, ""); half = LLVMBuildBitCast(bld->builder, half, vec_type, ""); - res = LLVMBuildAdd(bld->builder, a, half, ""); + res = LLVMBuildFAdd(bld->builder, a, half, ""); } res = LLVMBuildFPToSI(bld->builder, res, int_vec_type, ""); @@ -1082,7 +1121,7 @@ lp_build_ifloor(struct lp_build_context *bld, offset = LLVMBuildAnd(bld->builder, offset, sign, ""); offset = LLVMBuildBitCast(bld->builder, offset, vec_type, "ifloor.offset"); - res = LLVMBuildAdd(bld->builder, a, offset, "ifloor.res"); + res = LLVMBuildFAdd(bld->builder, a, offset, "ifloor.res"); } /* round to nearest (toward zero) */ @@ -1132,7 +1171,7 @@ lp_build_iceil(struct lp_build_context *bld, offset = LLVMBuildAnd(bld->builder, offset, sign, ""); offset = LLVMBuildBitCast(bld->builder, offset, vec_type, "iceil.offset"); - res = LLVMBuildAdd(bld->builder, a, offset, "iceil.res"); + res = LLVMBuildFAdd(bld->builder, a, offset, "iceil.res"); } /* round to nearest (toward zero) */ @@ -1197,9 +1236,9 @@ lp_build_rcp(struct lp_build_context *bld, rcp_a = lp_build_intrinsic_unary(bld->builder, "llvm.x86.sse.rcp.ps", lp_build_vec_type(type), a); - res = LLVMBuildMul(bld->builder, a, rcp_a, ""); - res = LLVMBuildSub(bld->builder, two, res, ""); - res = LLVMBuildMul(bld->builder, res, rcp_a, ""); + res = LLVMBuildFMul(bld->builder, a, rcp_a, ""); + res = LLVMBuildFSub(bld->builder, two, res, ""); + res = LLVMBuildFMul(bld->builder, res, rcp_a, ""); return rcp_a; #else @@ -1282,7 +1321,7 @@ lp_build_sin(struct lp_build_context *bld, */ LLVMValueRef FOPi = lp_build_const_v4sf(1.27323954473516); - LLVMValueRef scale_y = LLVMBuildMul(b, x_abs, FOPi, "scale_y"); + LLVMValueRef scale_y = LLVMBuildFMul(b, x_abs, FOPi, "scale_y"); /* * store the integer part of y in mm0 @@ -1356,9 +1395,9 @@ lp_build_sin(struct lp_build_context *bld, * xmm2 = _mm_mul_ps(y, xmm2); * xmm3 = _mm_mul_ps(y, xmm3); */ - LLVMValueRef xmm1 = LLVMBuildMul(b, y_2, DP1, "xmm1"); - LLVMValueRef xmm2 = LLVMBuildMul(b, y_2, DP2, "xmm2"); - LLVMValueRef xmm3 = LLVMBuildMul(b, y_2, DP3, "xmm3"); + LLVMValueRef xmm1 = LLVMBuildFMul(b, y_2, DP1, "xmm1"); + LLVMValueRef xmm2 = LLVMBuildFMul(b, y_2, DP2, "xmm2"); + LLVMValueRef xmm3 = LLVMBuildFMul(b, y_2, DP3, "xmm3"); /* * x = _mm_add_ps(x, xmm1); @@ -1366,16 +1405,16 @@ lp_build_sin(struct lp_build_context *bld, * x = _mm_add_ps(x, xmm3); */ - LLVMValueRef x_1 = LLVMBuildAdd(b, x_abs, xmm1, "x_1"); - LLVMValueRef x_2 = LLVMBuildAdd(b, x_1, xmm2, "x_2"); - LLVMValueRef x_3 = LLVMBuildAdd(b, x_2, xmm3, "x_3"); + LLVMValueRef x_1 = LLVMBuildFAdd(b, x_abs, xmm1, "x_1"); + LLVMValueRef x_2 = LLVMBuildFAdd(b, x_1, xmm2, "x_2"); + LLVMValueRef x_3 = LLVMBuildFAdd(b, x_2, xmm3, "x_3"); /* * Evaluate the first polynom (0 <= x <= Pi/4) * * z = _mm_mul_ps(x,x); */ - LLVMValueRef z = LLVMBuildMul(b, x_3, x_3, "z"); + LLVMValueRef z = LLVMBuildFMul(b, x_3, x_3, "z"); /* * _PS_CONST(coscof_p0, 2.443315711809948E-005); @@ -1390,12 +1429,12 @@ lp_build_sin(struct lp_build_context *bld, * y = *(v4sf*)_ps_coscof_p0; * y = _mm_mul_ps(y, z); */ - LLVMValueRef y_3 = LLVMBuildMul(b, z, coscof_p0, "y_3"); - LLVMValueRef y_4 = LLVMBuildAdd(b, y_3, coscof_p1, "y_4"); - LLVMValueRef y_5 = LLVMBuildMul(b, y_4, z, "y_5"); - LLVMValueRef y_6 = LLVMBuildAdd(b, y_5, coscof_p2, "y_6"); - LLVMValueRef y_7 = LLVMBuildMul(b, y_6, z, "y_7"); - LLVMValueRef y_8 = LLVMBuildMul(b, y_7, z, "y_8"); + LLVMValueRef y_3 = LLVMBuildFMul(b, z, coscof_p0, "y_3"); + LLVMValueRef y_4 = LLVMBuildFAdd(b, y_3, coscof_p1, "y_4"); + LLVMValueRef y_5 = LLVMBuildFMul(b, y_4, z, "y_5"); + LLVMValueRef y_6 = LLVMBuildFAdd(b, y_5, coscof_p2, "y_6"); + LLVMValueRef y_7 = LLVMBuildFMul(b, y_6, z, "y_7"); + LLVMValueRef y_8 = LLVMBuildFMul(b, y_7, z, "y_8"); /* @@ -1404,10 +1443,10 @@ lp_build_sin(struct lp_build_context *bld, * y = _mm_add_ps(y, *(v4sf*)_ps_1); */ LLVMValueRef half = lp_build_const_v4sf(0.5); - LLVMValueRef tmp = LLVMBuildMul(b, z, half, "tmp"); - LLVMValueRef y_9 = LLVMBuildSub(b, y_8, tmp, "y_8"); + LLVMValueRef tmp = LLVMBuildFMul(b, z, half, "tmp"); + LLVMValueRef y_9 = LLVMBuildFSub(b, y_8, tmp, "y_8"); LLVMValueRef one = lp_build_const_v4sf(1.0); - LLVMValueRef y_10 = LLVMBuildAdd(b, y_9, one, "y_9"); + LLVMValueRef y_10 = LLVMBuildFAdd(b, y_9, one, "y_9"); /* * _PS_CONST(sincof_p0, -1.9515295891E-4); @@ -1431,13 +1470,13 @@ lp_build_sin(struct lp_build_context *bld, * y2 = _mm_add_ps(y2, x); */ - LLVMValueRef y2_3 = LLVMBuildMul(b, z, sincof_p0, "y2_3"); - LLVMValueRef y2_4 = LLVMBuildAdd(b, y2_3, sincof_p1, "y2_4"); - LLVMValueRef y2_5 = LLVMBuildMul(b, y2_4, z, "y2_5"); - LLVMValueRef y2_6 = LLVMBuildAdd(b, y2_5, sincof_p2, "y2_6"); - LLVMValueRef y2_7 = LLVMBuildMul(b, y2_6, z, "y2_7"); - LLVMValueRef y2_8 = LLVMBuildMul(b, y2_7, x_3, "y2_8"); - LLVMValueRef y2_9 = LLVMBuildAdd(b, y2_8, x_3, "y2_9"); + LLVMValueRef y2_3 = LLVMBuildFMul(b, z, sincof_p0, "y2_3"); + LLVMValueRef y2_4 = LLVMBuildFAdd(b, y2_3, sincof_p1, "y2_4"); + LLVMValueRef y2_5 = LLVMBuildFMul(b, y2_4, z, "y2_5"); + LLVMValueRef y2_6 = LLVMBuildFAdd(b, y2_5, sincof_p2, "y2_6"); + LLVMValueRef y2_7 = LLVMBuildFMul(b, y2_6, z, "y2_7"); + LLVMValueRef y2_8 = LLVMBuildFMul(b, y2_7, x_3, "y2_8"); + LLVMValueRef y2_9 = LLVMBuildFAdd(b, y2_8, x_3, "y2_9"); /* * select the correct result from the two polynoms @@ -1493,7 +1532,7 @@ lp_build_cos(struct lp_build_context *bld, */ LLVMValueRef FOPi = lp_build_const_v4sf(1.27323954473516); - LLVMValueRef scale_y = LLVMBuildMul(b, x_abs, FOPi, "scale_y"); + LLVMValueRef scale_y = LLVMBuildFMul(b, x_abs, FOPi, "scale_y"); /* * store the integer part of y in mm0 @@ -1573,9 +1612,9 @@ lp_build_cos(struct lp_build_context *bld, * xmm2 = _mm_mul_ps(y, xmm2); * xmm3 = _mm_mul_ps(y, xmm3); */ - LLVMValueRef xmm1 = LLVMBuildMul(b, y_2, DP1, "xmm1"); - LLVMValueRef xmm2 = LLVMBuildMul(b, y_2, DP2, "xmm2"); - LLVMValueRef xmm3 = LLVMBuildMul(b, y_2, DP3, "xmm3"); + LLVMValueRef xmm1 = LLVMBuildFMul(b, y_2, DP1, "xmm1"); + LLVMValueRef xmm2 = LLVMBuildFMul(b, y_2, DP2, "xmm2"); + LLVMValueRef xmm3 = LLVMBuildFMul(b, y_2, DP3, "xmm3"); /* * x = _mm_add_ps(x, xmm1); @@ -1583,16 +1622,16 @@ lp_build_cos(struct lp_build_context *bld, * x = _mm_add_ps(x, xmm3); */ - LLVMValueRef x_1 = LLVMBuildAdd(b, x_abs, xmm1, "x_1"); - LLVMValueRef x_2 = LLVMBuildAdd(b, x_1, xmm2, "x_2"); - LLVMValueRef x_3 = LLVMBuildAdd(b, x_2, xmm3, "x_3"); + LLVMValueRef x_1 = LLVMBuildFAdd(b, x_abs, xmm1, "x_1"); + LLVMValueRef x_2 = LLVMBuildFAdd(b, x_1, xmm2, "x_2"); + LLVMValueRef x_3 = LLVMBuildFAdd(b, x_2, xmm3, "x_3"); /* * Evaluate the first polynom (0 <= x <= Pi/4) * * z = _mm_mul_ps(x,x); */ - LLVMValueRef z = LLVMBuildMul(b, x_3, x_3, "z"); + LLVMValueRef z = LLVMBuildFMul(b, x_3, x_3, "z"); /* * _PS_CONST(coscof_p0, 2.443315711809948E-005); @@ -1607,12 +1646,12 @@ lp_build_cos(struct lp_build_context *bld, * y = *(v4sf*)_ps_coscof_p0; * y = _mm_mul_ps(y, z); */ - LLVMValueRef y_3 = LLVMBuildMul(b, z, coscof_p0, "y_3"); - LLVMValueRef y_4 = LLVMBuildAdd(b, y_3, coscof_p1, "y_4"); - LLVMValueRef y_5 = LLVMBuildMul(b, y_4, z, "y_5"); - LLVMValueRef y_6 = LLVMBuildAdd(b, y_5, coscof_p2, "y_6"); - LLVMValueRef y_7 = LLVMBuildMul(b, y_6, z, "y_7"); - LLVMValueRef y_8 = LLVMBuildMul(b, y_7, z, "y_8"); + LLVMValueRef y_3 = LLVMBuildFMul(b, z, coscof_p0, "y_3"); + LLVMValueRef y_4 = LLVMBuildFAdd(b, y_3, coscof_p1, "y_4"); + LLVMValueRef y_5 = LLVMBuildFMul(b, y_4, z, "y_5"); + LLVMValueRef y_6 = LLVMBuildFAdd(b, y_5, coscof_p2, "y_6"); + LLVMValueRef y_7 = LLVMBuildFMul(b, y_6, z, "y_7"); + LLVMValueRef y_8 = LLVMBuildFMul(b, y_7, z, "y_8"); /* @@ -1621,10 +1660,10 @@ lp_build_cos(struct lp_build_context *bld, * y = _mm_add_ps(y, *(v4sf*)_ps_1); */ LLVMValueRef half = lp_build_const_v4sf(0.5); - LLVMValueRef tmp = LLVMBuildMul(b, z, half, "tmp"); - LLVMValueRef y_9 = LLVMBuildSub(b, y_8, tmp, "y_8"); + LLVMValueRef tmp = LLVMBuildFMul(b, z, half, "tmp"); + LLVMValueRef y_9 = LLVMBuildFSub(b, y_8, tmp, "y_8"); LLVMValueRef one = lp_build_const_v4sf(1.0); - LLVMValueRef y_10 = LLVMBuildAdd(b, y_9, one, "y_9"); + LLVMValueRef y_10 = LLVMBuildFAdd(b, y_9, one, "y_9"); /* * _PS_CONST(sincof_p0, -1.9515295891E-4); @@ -1648,13 +1687,13 @@ lp_build_cos(struct lp_build_context *bld, * y2 = _mm_add_ps(y2, x); */ - LLVMValueRef y2_3 = LLVMBuildMul(b, z, sincof_p0, "y2_3"); - LLVMValueRef y2_4 = LLVMBuildAdd(b, y2_3, sincof_p1, "y2_4"); - LLVMValueRef y2_5 = LLVMBuildMul(b, y2_4, z, "y2_5"); - LLVMValueRef y2_6 = LLVMBuildAdd(b, y2_5, sincof_p2, "y2_6"); - LLVMValueRef y2_7 = LLVMBuildMul(b, y2_6, z, "y2_7"); - LLVMValueRef y2_8 = LLVMBuildMul(b, y2_7, x_3, "y2_8"); - LLVMValueRef y2_9 = LLVMBuildAdd(b, y2_8, x_3, "y2_9"); + LLVMValueRef y2_3 = LLVMBuildFMul(b, z, sincof_p0, "y2_3"); + LLVMValueRef y2_4 = LLVMBuildFAdd(b, y2_3, sincof_p1, "y2_4"); + LLVMValueRef y2_5 = LLVMBuildFMul(b, y2_4, z, "y2_5"); + LLVMValueRef y2_6 = LLVMBuildFAdd(b, y2_5, sincof_p2, "y2_6"); + LLVMValueRef y2_7 = LLVMBuildFMul(b, y2_6, z, "y2_7"); + LLVMValueRef y2_8 = LLVMBuildFMul(b, y2_7, x_3, "y2_8"); + LLVMValueRef y2_9 = LLVMBuildFAdd(b, y2_8, x_3, "y2_9"); /* * select the correct result from the two polynoms @@ -1829,7 +1868,7 @@ lp_build_exp2_approx(struct lp_build_context *bld, ipart = lp_build_floor(bld, x); /* fpart = x - ipart */ - fpart = LLVMBuildSub(bld->builder, x, ipart, ""); + fpart = LLVMBuildFSub(bld->builder, x, ipart, ""); } if(p_exp2_int_part || p_exp2) { @@ -1844,7 +1883,7 @@ lp_build_exp2_approx(struct lp_build_context *bld, expfpart = lp_build_polynomial(bld, fpart, lp_build_exp2_polynomial, Elements(lp_build_exp2_polynomial)); - res = LLVMBuildMul(bld->builder, expipart, expfpart, ""); + res = LLVMBuildFMul(bld->builder, expipart, expfpart, ""); } if(p_exp2_int_part) @@ -1957,9 +1996,9 @@ lp_build_log2_approx(struct lp_build_context *bld, Elements(lp_build_log2_polynomial)); /* This effectively increases the polynomial degree by one, but ensures that log2(1) == 0*/ - logmant = LLVMBuildMul(bld->builder, logmant, LLVMBuildSub(bld->builder, mant, bld->one, ""), ""); + logmant = LLVMBuildFMul(bld->builder, logmant, LLVMBuildFSub(bld->builder, mant, bld->one, ""), ""); - res = LLVMBuildAdd(bld->builder, logmant, logexp, ""); + res = LLVMBuildFAdd(bld->builder, logmant, logexp, ""); } if(p_exp) { diff --git a/src/gallium/auxiliary/gallivm/lp_bld_conv.c b/src/gallium/auxiliary/gallivm/lp_bld_conv.c index 77012f1fac..8b477313d4 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_conv.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_conv.c @@ -117,8 +117,8 @@ lp_build_clamped_float_to_unsigned_norm(LLVMBuilderRef builder, scale = (double)mask/ubound; bias = (double)((unsigned long long)1 << (mantissa - n)); - res = LLVMBuildMul(builder, src, lp_build_const_vec(src_type, scale), ""); - res = LLVMBuildAdd(builder, res, lp_build_const_vec(src_type, bias), ""); + res = LLVMBuildFMul(builder, src, lp_build_const_vec(src_type, scale), ""); + res = LLVMBuildFAdd(builder, res, lp_build_const_vec(src_type, bias), ""); res = LLVMBuildBitCast(builder, res, int_vec_type, ""); if(dst_width > n) { @@ -175,6 +175,8 @@ lp_build_unsigned_norm_to_float(LLVMBuilderRef builder, double scale; double bias; + assert(dst_type.floating); + mantissa = lp_mantissa(dst_type); n = MIN2(mantissa, src_width); @@ -199,8 +201,8 @@ lp_build_unsigned_norm_to_float(LLVMBuilderRef builder, res = LLVMBuildBitCast(builder, res, vec_type, ""); - res = LLVMBuildSub(builder, res, bias_, ""); - res = LLVMBuildMul(builder, res, lp_build_const_vec(dst_type, scale), ""); + res = LLVMBuildFSub(builder, res, bias_, ""); + res = LLVMBuildFMul(builder, res, lp_build_const_vec(dst_type, scale), ""); return res; } @@ -296,7 +298,7 @@ lp_build_conv(LLVMBuilderRef builder, if (dst_scale != 1.0) { LLVMValueRef scale = lp_build_const_vec(tmp_type, dst_scale); for(i = 0; i < num_tmps; ++i) - tmp[i] = LLVMBuildMul(builder, tmp[i], scale, ""); + tmp[i] = LLVMBuildFMul(builder, tmp[i], scale, ""); } /* Use an equally sized integer for intermediate computations */ @@ -391,7 +393,7 @@ lp_build_conv(LLVMBuilderRef builder, if (src_scale != 1.0) { LLVMValueRef scale = lp_build_const_vec(tmp_type, 1.0/src_scale); for(i = 0; i < num_tmps; ++i) - tmp[i] = LLVMBuildMul(builder, tmp[i], scale, ""); + tmp[i] = LLVMBuildFMul(builder, tmp[i], scale, ""); } } } diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c b/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c index 0f01fc1d75..247cb83ce6 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c @@ -240,7 +240,7 @@ lp_build_unpack_arith_rgba_aos(LLVMBuilderRef builder, */ if (normalized) - scaled = LLVMBuildMul(builder, casted, LLVMConstVector(scales, 4), ""); + scaled = LLVMBuildFMul(builder, casted, LLVMConstVector(scales, 4), ""); else scaled = casted; @@ -322,7 +322,7 @@ lp_build_pack_rgba_aos(LLVMBuilderRef builder, } if (normalized) - scaled = LLVMBuildMul(builder, unswizzled, LLVMConstVector(scales, 4), ""); + scaled = LLVMBuildFMul(builder, unswizzled, LLVMConstVector(scales, 4), ""); else scaled = unswizzled; diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c index 9f405921b0..c724a4453e 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c @@ -197,7 +197,7 @@ lp_build_unpack_rgba_soa(LLVMBuilderRef builder, if (format_desc->channel[chan].normalized) { double scale = 1.0 / ((1 << (format_desc->channel[chan].size - 1)) - 1); LLVMValueRef scale_val = lp_build_const_vec(type, scale); - input = LLVMBuildMul(builder, input, scale_val, ""); + input = LLVMBuildFMul(builder, input, scale_val, ""); } } else { @@ -227,7 +227,7 @@ lp_build_unpack_rgba_soa(LLVMBuilderRef builder, double scale = 1.0 / ((1 << (format_desc->channel[chan].size/2)) - 1); LLVMValueRef scale_val = lp_build_const_vec(type, scale); input = LLVMBuildSIToFP(builder, input, lp_build_vec_type(type), ""); - input = LLVMBuildMul(builder, input, scale_val, ""); + input = LLVMBuildFMul(builder, input, scale_val, ""); } else { /* FIXME */ diff --git a/src/gallium/auxiliary/gallivm/lp_bld_quad.c b/src/gallium/auxiliary/gallivm/lp_bld_quad.c index ca36046d22..7b1088939b 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_quad.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_quad.c @@ -85,7 +85,7 @@ lp_build_scalar_ddx(struct lp_build_context *bld, LLVMValueRef idx_right = LLVMConstInt(LLVMInt32Type(), LP_BLD_QUAD_TOP_RIGHT, 0); LLVMValueRef a_left = LLVMBuildExtractElement(bld->builder, a, idx_left, ""); LLVMValueRef a_right = LLVMBuildExtractElement(bld->builder, a, idx_right, ""); - return LLVMBuildSub(bld->builder, a_right, a_left, ""); + return lp_build_sub(bld, a_right, a_left); } @@ -97,5 +97,5 @@ lp_build_scalar_ddy(struct lp_build_context *bld, LLVMValueRef idx_bottom = LLVMConstInt(LLVMInt32Type(), LP_BLD_QUAD_BOTTOM_LEFT, 0); LLVMValueRef a_top = LLVMBuildExtractElement(bld->builder, a, idx_top, ""); LLVMValueRef a_bottom = LLVMBuildExtractElement(bld->builder, a, idx_bottom, ""); - return LLVMBuildSub(bld->builder, a_bottom, a_top, ""); + return lp_build_sub(bld, a_bottom, a_top); } diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c index 1a20d74cac..955d328953 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c @@ -888,17 +888,17 @@ lp_build_lod_selector(struct lp_build_sample_context *bld, /* Compute rho = max of all partial derivatives scaled by texture size. * XXX this could be vectorized somewhat */ - rho = LLVMBuildMul(bld->builder, + rho = LLVMBuildFMul(bld->builder, lp_build_max(float_bld, dsdx, dsdy), lp_build_int_to_float(float_bld, width), ""); if (dims > 1) { LLVMValueRef max; - max = LLVMBuildMul(bld->builder, + max = LLVMBuildFMul(bld->builder, lp_build_max(float_bld, dtdx, dtdy), lp_build_int_to_float(float_bld, height), ""); rho = lp_build_max(float_bld, rho, max); if (dims > 2) { - max = LLVMBuildMul(bld->builder, + max = LLVMBuildFMul(bld->builder, lp_build_max(float_bld, drdx, drdy), lp_build_int_to_float(float_bld, depth), ""); rho = lp_build_max(float_bld, rho, max); @@ -912,12 +912,12 @@ lp_build_lod_selector(struct lp_build_sample_context *bld, if (lod_bias) { lod_bias = LLVMBuildExtractElement(bld->builder, lod_bias, index0, ""); - lod = LLVMBuildAdd(bld->builder, lod, lod_bias, "shader_lod_bias"); + lod = LLVMBuildFAdd(bld->builder, lod, lod_bias, "shader_lod_bias"); } } /* add sampler lod bias */ - lod = LLVMBuildAdd(bld->builder, lod, sampler_lod_bias, "sampler_lod_bias"); + lod = LLVMBuildFAdd(bld->builder, lod, sampler_lod_bias, "sampler_lod_bias"); /* clamp lod */ lod = lp_build_clamp(float_bld, lod, min_lod, max_lod); @@ -2029,6 +2029,8 @@ lp_build_sample_soa(LLVMBuilderRef builder, debug_printf("Sample from %s\n", util_format_name(fmt)); } + assert(type.floating); + /* Setup our build context */ memset(&bld, 0, sizeof bld); bld.builder = builder; diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c index 42d796cb95..becbd3bece 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c @@ -506,6 +506,7 @@ emit_fetch( const unsigned chan_index ) { const struct tgsi_full_src_register *reg = &inst->Src[src_op]; + const struct lp_type type = bld->base.type; const unsigned swizzle = tgsi_util_get_full_src_register_swizzle(reg, chan_index); LLVMValueRef res; @@ -612,11 +613,12 @@ emit_fetch( case TGSI_UTIL_SIGN_SET: /* TODO: Use bitwese OR for floating point */ res = lp_build_abs( &bld->base, res ); - res = LLVMBuildNeg( bld->base.builder, res, "" ); - break; - + /* fall through */ case TGSI_UTIL_SIGN_TOGGLE: - res = LLVMBuildNeg( bld->base.builder, res, "" ); + if (type.floating) + res = LLVMBuildFNeg( bld->base.builder, res, "" ); + else + res = LLVMBuildNeg( bld->base.builder, res, "" ); break; case TGSI_UTIL_SIGN_KEEP: -- cgit v1.2.3 From 8a3a971743a90463e65b44f1769a5301a31ce4cd Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Mon, 9 Aug 2010 17:26:18 +0100 Subject: gallivm: Don't call LLVMBuildFNeg on llvm-2.6. It didn't exist yet. --- src/gallium/auxiliary/gallivm/lp_bld_arit.c | 7 +++---- src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c | 6 +----- 2 files changed, 4 insertions(+), 9 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c b/src/gallium/auxiliary/gallivm/lp_bld_arit.c index d2dde41e9f..cecc1858bc 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c @@ -512,10 +512,7 @@ lp_build_mul_imm(struct lp_build_context *bld, return a; if(b == -1) - if (bld->type.floating) - return LLVMBuildFNeg(bld->builder, a, ""); - else - return LLVMBuildNeg(bld->builder, a, ""); + return lp_build_negate(bld, a); if(b == 2 && bld->type.floating) return lp_build_add(bld, a, a); @@ -748,9 +745,11 @@ LLVMValueRef lp_build_negate(struct lp_build_context *bld, LLVMValueRef a) { +#if HAVE_LLVM >= 0x0207 if (bld->type.floating) a = LLVMBuildFNeg(bld->builder, a, ""); else +#endif a = LLVMBuildNeg(bld->builder, a, ""); return a; diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c index becbd3bece..0aa64affac 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c @@ -506,7 +506,6 @@ emit_fetch( const unsigned chan_index ) { const struct tgsi_full_src_register *reg = &inst->Src[src_op]; - const struct lp_type type = bld->base.type; const unsigned swizzle = tgsi_util_get_full_src_register_swizzle(reg, chan_index); LLVMValueRef res; @@ -615,10 +614,7 @@ emit_fetch( res = lp_build_abs( &bld->base, res ); /* fall through */ case TGSI_UTIL_SIGN_TOGGLE: - if (type.floating) - res = LLVMBuildFNeg( bld->base.builder, res, "" ); - else - res = LLVMBuildNeg( bld->base.builder, res, "" ); + res = lp_build_negate( &bld->base, res ); break; case TGSI_UTIL_SIGN_KEEP: -- cgit v1.2.3 From 6e1f9bc8f62baf3854a53bf67bb025790f2cb317 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Mon, 9 Aug 2010 17:30:33 +0100 Subject: gallivm: More type checks. --- src/gallium/auxiliary/gallivm/lp_bld_arit.c | 43 +++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) (limited to 'src/gallium') diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c b/src/gallium/auxiliary/gallivm/lp_bld_arit.c index cecc1858bc..ec9b53be80 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c @@ -160,6 +160,8 @@ lp_build_comp(struct lp_build_context *bld, { const struct lp_type type = bld->type; + assert(lp_check_value(type, a)); + if(a == bld->one) return bld->zero; if(a == bld->zero) @@ -255,6 +257,8 @@ lp_build_sum_vector(struct lp_build_context *bld, LLVMValueRef index, res; unsigned i; + assert(lp_check_value(type, a)); + if (a == bld->zero) return bld->zero; if (a == bld->undef) @@ -505,6 +509,8 @@ lp_build_mul_imm(struct lp_build_context *bld, { LLVMValueRef factor; + assert(lp_check_value(bld->type, a)); + if(b == 0) return bld->zero; @@ -598,6 +604,10 @@ lp_build_lerp(struct lp_build_context *bld, LLVMValueRef delta; LLVMValueRef res; + assert(lp_check_value(bld->type, x)); + assert(lp_check_value(bld->type, v0)); + assert(lp_check_value(bld->type, v1)); + delta = lp_build_sub(bld, v1, v0); res = lp_build_mul(bld, x, delta); @@ -639,6 +649,9 @@ lp_build_min(struct lp_build_context *bld, LLVMValueRef a, LLVMValueRef b) { + assert(lp_check_value(bld->type, a)); + assert(lp_check_value(bld->type, b)); + if(a == bld->undef || b == bld->undef) return bld->undef; @@ -667,6 +680,9 @@ lp_build_max(struct lp_build_context *bld, LLVMValueRef a, LLVMValueRef b) { + assert(lp_check_value(bld->type, a)); + assert(lp_check_value(bld->type, b)); + if(a == bld->undef || b == bld->undef) return bld->undef; @@ -696,6 +712,10 @@ lp_build_clamp(struct lp_build_context *bld, LLVMValueRef min, LLVMValueRef max) { + assert(lp_check_value(bld->type, a)); + assert(lp_check_value(bld->type, min)); + assert(lp_check_value(bld->type, max)); + a = lp_build_min(bld, a, max); a = lp_build_max(bld, a, min); return a; @@ -712,6 +732,8 @@ lp_build_abs(struct lp_build_context *bld, const struct lp_type type = bld->type; LLVMTypeRef vec_type = lp_build_vec_type(type); + assert(lp_check_value(type, a)); + if(!type.sign) return a; @@ -745,6 +767,8 @@ LLVMValueRef lp_build_negate(struct lp_build_context *bld, LLVMValueRef a) { + assert(lp_check_value(bld->type, a)); + #if HAVE_LLVM >= 0x0207 if (bld->type.floating) a = LLVMBuildFNeg(bld->builder, a, ""); @@ -765,6 +789,8 @@ lp_build_sgn(struct lp_build_context *bld, LLVMValueRef cond; LLVMValueRef res; + assert(lp_check_value(type, a)); + /* Handle non-zero case */ if(!type.sign) { /* if not zero then sign must be positive */ @@ -822,6 +848,7 @@ lp_build_set_sign(struct lp_build_context *bld, ~((unsigned long long) 1 << (type.width - 1))); LLVMValueRef val, res; + assert(lp_check_value(type, a)); assert(type.floating); /* val = reinterpret_cast(a) */ @@ -1188,6 +1215,8 @@ lp_build_sqrt(struct lp_build_context *bld, LLVMTypeRef vec_type = lp_build_vec_type(type); char intrinsic[32]; + assert(lp_check_value(type, a)); + /* TODO: optimize the constant case */ /* TODO: optimize the constant case */ @@ -1204,6 +1233,8 @@ lp_build_rcp(struct lp_build_context *bld, { const struct lp_type type = bld->type; + assert(lp_check_value(type, a)); + if(a == bld->zero) return bld->undef; if(a == bld->one) @@ -1258,6 +1289,8 @@ lp_build_rsqrt(struct lp_build_context *bld, { const struct lp_type type = bld->type; + assert(lp_check_value(type, a)); + assert(type.floating); if(util_cpu_caps.has_sse && type.width == 32 && type.length == 4) @@ -1745,6 +1778,8 @@ lp_build_exp(struct lp_build_context *bld, /* log2(e) = 1/log(2) */ LLVMValueRef log2e = lp_build_const_vec(bld->type, 1.4426950408889634); + assert(lp_check_value(bld->type, x)); + return lp_build_mul(bld, log2e, lp_build_exp2(bld, x)); } @@ -1759,6 +1794,8 @@ lp_build_log(struct lp_build_context *bld, /* log(2) */ LLVMValueRef log2 = lp_build_const_vec(bld->type, 0.69314718055994529); + assert(lp_check_value(bld->type, x)); + return lp_build_mul(bld, log2, lp_build_exp2(bld, x)); } @@ -1781,6 +1818,8 @@ lp_build_polynomial(struct lp_build_context *bld, LLVMValueRef res = NULL; unsigned i; + assert(lp_check_value(bld->type, x)); + /* TODO: optimize the constant case */ if(LLVMIsConstant(x)) debug_printf("%s: inefficient/imprecise constant arithmetic\n", @@ -1852,6 +1891,8 @@ lp_build_exp2_approx(struct lp_build_context *bld, LLVMValueRef expfpart = NULL; LLVMValueRef res = NULL; + assert(lp_check_value(bld->type, x)); + if(p_exp2_int_part || p_frac_part || p_exp2) { /* TODO: optimize the constant case */ if(LLVMIsConstant(x)) @@ -1965,6 +2006,8 @@ lp_build_log2_approx(struct lp_build_context *bld, LLVMValueRef logmant = NULL; LLVMValueRef res = NULL; + assert(lp_check_value(bld->type, x)); + if(p_exp || p_floor_log2 || p_log2) { /* TODO: optimize the constant case */ if(LLVMIsConstant(x)) -- cgit v1.2.3 From e9f3994b16beabb5892abdc4b359a093cbde7f79 Mon Sep 17 00:00:00 2001 From: nobled Date: Fri, 6 Aug 2010 17:32:29 +0000 Subject: llvmpipe: Always use floating-point operators for floating-point types MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit See: http://bugs.freedesktop.org/29404 http://bugs.freedesktop.org/29407 Signed-off-by: José Fonseca --- src/gallium/drivers/llvmpipe/lp_bld_interp.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/drivers/llvmpipe/lp_bld_interp.c b/src/gallium/drivers/llvmpipe/lp_bld_interp.c index 78744da500..2cf6f38c4b 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_interp.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_interp.c @@ -141,7 +141,7 @@ coeffs_init(struct lp_build_interp_soa_context *bld, else { dadx = LLVMBuildLoad(builder, LLVMBuildGEP(builder, dadx_ptr, &index, 1, ""), ""); dady = LLVMBuildLoad(builder, LLVMBuildGEP(builder, dady_ptr, &index, 1, ""), ""); - dadxy = LLVMBuildAdd(builder, dadx, dady, ""); + dadxy = LLVMBuildFAdd(builder, dadx, dady, ""); attrib_name(dadx, attrib, chan, ".dadx"); attrib_name(dady, attrib, chan, ".dady"); attrib_name(dadxy, attrib, chan, ".dadxy"); @@ -177,7 +177,7 @@ coeffs_init(struct lp_build_interp_soa_context *bld, * dadq2 = 2 * dq */ - dadq2 = LLVMBuildAdd(builder, dadq, dadq, ""); + dadq2 = LLVMBuildFAdd(builder, dadq, dadq, ""); /* * a = a0 + x * dadx + y * dady @@ -193,12 +193,11 @@ coeffs_init(struct lp_build_interp_soa_context *bld, a = a0; if (interp != LP_INTERP_CONSTANT && interp != LP_INTERP_FACING) { - a = LLVMBuildAdd(builder, a, - LLVMBuildMul(builder, bld->x, dadx, ""), - ""); - a = LLVMBuildAdd(builder, a, - LLVMBuildMul(builder, bld->y, dady, ""), - ""); + LLVMValueRef tmp; + tmp = LLVMBuildFMul(builder, bld->x, dadx, ""); + a = LLVMBuildFAdd(builder, a, tmp, ""); + tmp = LLVMBuildFMul(builder, bld->y, dady, ""); + a = LLVMBuildFAdd(builder, a, tmp, ""); } } @@ -212,7 +211,7 @@ coeffs_init(struct lp_build_interp_soa_context *bld, * Compute the attrib values on the upper-left corner of each quad. */ - a = LLVMBuildAdd(builder, a, dadq2, ""); + a = LLVMBuildFAdd(builder, a, dadq2, ""); /* * a *= 1 / w -- cgit v1.2.3 From a44a6960fab8c0053678fe74ce4c978ef40b06ff Mon Sep 17 00:00:00 2001 From: nobled Date: Mon, 9 Aug 2010 21:15:08 +0000 Subject: gallivm: Even more type checking MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit http://bugs.freedesktop.org/29407 Signed-off-by: José Fonseca --- src/gallium/auxiliary/gallivm/lp_bld_arit.c | 12 +++++++++++- src/gallium/auxiliary/gallivm/lp_bld_logic.c | 15 +++++++++++++++ 2 files changed, 26 insertions(+), 1 deletion(-) (limited to 'src/gallium') diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c b/src/gallium/auxiliary/gallivm/lp_bld_arit.c index ec9b53be80..860fbd829b 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c @@ -72,6 +72,9 @@ lp_build_min_simple(struct lp_build_context *bld, const char *intrinsic = NULL; LLVMValueRef cond; + assert(lp_check_value(type, a)); + assert(lp_check_value(type, b)); + /* TODO: optimize the constant case */ if(type.width * type.length == 128) { @@ -118,6 +121,9 @@ lp_build_max_simple(struct lp_build_context *bld, const char *intrinsic = NULL; LLVMValueRef cond; + assert(lp_check_value(type, a)); + assert(lp_check_value(type, b)); + /* TODO: optimize the constant case */ if(type.width * type.length == 128) { @@ -395,6 +401,10 @@ lp_build_mul_u8n(LLVMBuilderRef builder, LLVMValueRef c8; LLVMValueRef ab; + assert(!i16_type.floating); + assert(lp_check_value(i16_type, a)); + assert(lp_check_value(i16_type, b)); + c8 = lp_build_const_int_vec(i16_type, 8); #if 0 @@ -848,8 +858,8 @@ lp_build_set_sign(struct lp_build_context *bld, ~((unsigned long long) 1 << (type.width - 1))); LLVMValueRef val, res; - assert(lp_check_value(type, a)); assert(type.floating); + assert(lp_check_value(type, a)); /* val = reinterpret_cast(a) */ val = LLVMBuildBitCast(bld->builder, a, int_vec_type, ""); diff --git a/src/gallium/auxiliary/gallivm/lp_bld_logic.c b/src/gallium/auxiliary/gallivm/lp_bld_logic.c index ab4ddb81c4..96f8e21fc6 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_logic.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_logic.c @@ -83,6 +83,8 @@ lp_build_compare(LLVMBuilderRef builder, assert(func >= PIPE_FUNC_NEVER); assert(func <= PIPE_FUNC_ALWAYS); + assert(lp_check_value(type, a)); + assert(lp_check_value(type, b)); if(func == PIPE_FUNC_NEVER) return zeros; @@ -374,6 +376,9 @@ lp_build_select_bitwise(struct lp_build_context *bld, struct lp_type type = bld->type; LLVMValueRef res; + assert(lp_check_value(type, a)); + assert(lp_check_value(type, b)); + if (a == b) { return a; } @@ -419,6 +424,9 @@ lp_build_select(struct lp_build_context *bld, struct lp_type type = bld->type; LLVMValueRef res; + assert(lp_check_value(type, a)); + assert(lp_check_value(type, b)); + if(a == b) return a; @@ -484,6 +492,9 @@ lp_build_select_aos(struct lp_build_context *bld, const unsigned n = type.length; unsigned i, j; + assert(lp_check_value(type, a)); + assert(lp_check_value(type, b)); + if(a == b) return a; if(cond[0] && cond[1] && cond[2] && cond[3]) @@ -539,7 +550,11 @@ lp_build_select_aos(struct lp_build_context *bld, LLVMValueRef lp_build_andc(struct lp_build_context *bld, LLVMValueRef a, LLVMValueRef b) { + assert(lp_check_value(bld->type, a)); + assert(lp_check_value(bld->type, b)); + b = LLVMBuildNot(bld->builder, b, ""); b = LLVMBuildAnd(bld->builder, a, b, ""); + return b; } -- cgit v1.2.3 From 20b3e40f166c77bd7fa5b7171e5b4169ed035280 Mon Sep 17 00:00:00 2001 From: nobled Date: Mon, 9 Aug 2010 21:25:18 +0000 Subject: gallivm: Fix bitwise operations for floats, division for integers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit http://bugs.freedesktop.org/29407 Signed-off-by: José Fonseca --- src/gallium/auxiliary/gallivm/lp_bld_arit.c | 17 ++++++++++++++--- src/gallium/auxiliary/gallivm/lp_bld_logic.c | 15 +++++++++++++-- 2 files changed, 27 insertions(+), 5 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c b/src/gallium/auxiliary/gallivm/lp_bld_arit.c index 860fbd829b..cf2feeb163 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c @@ -588,13 +588,24 @@ lp_build_div(struct lp_build_context *bld, if(a == bld->undef || b == bld->undef) return bld->undef; - if(LLVMIsConstant(a) && LLVMIsConstant(b)) - return LLVMConstFDiv(a, b); + if(LLVMIsConstant(a) && LLVMIsConstant(b)) { + if (type.floating) + return LLVMConstFDiv(a, b); + else if (type.sign) + return LLVMConstSDiv(a, b); + else + return LLVMConstUDiv(a, b); + } if(util_cpu_caps.has_sse && type.width == 32 && type.length == 4) return lp_build_mul(bld, a, lp_build_rcp(bld, b)); - return LLVMBuildFDiv(bld->builder, a, b, ""); + if (type.floating) + return LLVMBuildFDiv(bld->builder, a, b, ""); + else if (type.sign) + return LLVMBuildSDiv(bld->builder, a, b, ""); + else + return LLVMBuildUDiv(bld->builder, a, b, ""); } diff --git a/src/gallium/auxiliary/gallivm/lp_bld_logic.c b/src/gallium/auxiliary/gallivm/lp_bld_logic.c index 96f8e21fc6..7d7db3b0d9 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_logic.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_logic.c @@ -550,11 +550,22 @@ lp_build_select_aos(struct lp_build_context *bld, LLVMValueRef lp_build_andc(struct lp_build_context *bld, LLVMValueRef a, LLVMValueRef b) { - assert(lp_check_value(bld->type, a)); - assert(lp_check_value(bld->type, b)); + const struct lp_type type = bld->type; + + assert(lp_check_value(type, a)); + assert(lp_check_value(type, b)); + + /* can't do bitwise ops on floating-point values */ + if(type.floating) { + a = LLVMBuildBitCast(bld->builder, a, bld->int_vec_type, ""); + b = LLVMBuildBitCast(bld->builder, b, bld->int_vec_type, ""); + } b = LLVMBuildNot(bld->builder, b, ""); b = LLVMBuildAnd(bld->builder, a, b, ""); + if(type.floating) { + b = LLVMBuildBitCast(bld->builder, b, bld->vec_type, ""); + } return b; } -- cgit v1.2.3 From f263fdee8146719b14d9f9b14cf0c224461f35dc Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Tue, 10 Aug 2010 08:56:20 -0600 Subject: gallivm: fix non-SSE4.1 case in lp_build_pack2() Since there's no SSE instruction for this case, fall through to the generic shuffle code. Fixes bug fd.o 29468. --- src/gallium/auxiliary/gallivm/lp_bld_pack.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/auxiliary/gallivm/lp_bld_pack.c b/src/gallium/auxiliary/gallivm/lp_bld_pack.c index 7748f8f099..ecfb13a0d4 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_pack.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_pack.c @@ -261,13 +261,14 @@ lp_build_pack2(LLVMBuilderRef builder, #endif LLVMTypeRef dst_vec_type = lp_build_vec_type(dst_type); LLVMValueRef shuffle; - LLVMValueRef res; + LLVMValueRef res = NULL; assert(!src_type.floating); assert(!dst_type.floating); assert(src_type.width == dst_type.width * 2); assert(src_type.length * 2 == dst_type.length); + /* Check for special cases first */ if(util_cpu_caps.has_sse2 && src_type.width * src_type.length == 128) { switch(src_type.width) { case 32: @@ -283,8 +284,8 @@ lp_build_pack2(LLVMBuilderRef builder, return lp_build_intrinsic_binary(builder, "llvm.x86.sse41.packusdw", dst_vec_type, lo, hi); } else { - assert(0); - return LLVMGetUndef(dst_vec_type); + /* use generic shuffle below */ + res = NULL; } } break; @@ -310,10 +311,13 @@ lp_build_pack2(LLVMBuilderRef builder, break; } - res = LLVMBuildBitCast(builder, res, dst_vec_type, ""); - return res; + if (res) { + res = LLVMBuildBitCast(builder, res, dst_vec_type, ""); + return res; + } } + /* generic shuffle */ lo = LLVMBuildBitCast(builder, lo, dst_vec_type, ""); hi = LLVMBuildBitCast(builder, hi, dst_vec_type, ""); -- cgit v1.2.3 From 247b253d326819e9c62801d741b3740a27aeaf8a Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Tue, 10 Aug 2010 16:32:29 +0100 Subject: util: Add util_format_srgb(). To convert RGB -> SRGB format. --- src/gallium/auxiliary/util/u_format.h | 38 +++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) (limited to 'src/gallium') diff --git a/src/gallium/auxiliary/util/u_format.h b/src/gallium/auxiliary/util/u_format.h index 38254b1096..8e786a390a 100644 --- a/src/gallium/auxiliary/util/u_format.h +++ b/src/gallium/auxiliary/util/u_format.h @@ -630,6 +630,44 @@ util_format_has_alpha(enum pipe_format format) } } +/** + * Return the matching SRGB format, or PIPE_FORMAT_NONE if none. + */ +static INLINE enum pipe_format +util_format_srgb(enum pipe_format format) +{ + switch (format) { + case PIPE_FORMAT_L8_UNORM: + return PIPE_FORMAT_L8_SRGB; + case PIPE_FORMAT_L8A8_UNORM: + return PIPE_FORMAT_L8A8_SRGB; + case PIPE_FORMAT_R8G8B8_UNORM: + return PIPE_FORMAT_R8G8B8_SRGB; + case PIPE_FORMAT_A8B8G8R8_UNORM: + return PIPE_FORMAT_A8B8G8R8_SRGB; + case PIPE_FORMAT_X8B8G8R8_UNORM: + return PIPE_FORMAT_X8B8G8R8_SRGB; + case PIPE_FORMAT_B8G8R8A8_UNORM: + return PIPE_FORMAT_B8G8R8A8_SRGB; + case PIPE_FORMAT_B8G8R8X8_UNORM: + return PIPE_FORMAT_B8G8R8X8_SRGB; + case PIPE_FORMAT_A8R8G8B8_UNORM: + return PIPE_FORMAT_A8R8G8B8_SRGB; + case PIPE_FORMAT_X8R8G8B8_UNORM: + return PIPE_FORMAT_X8R8G8B8_SRGB; + case PIPE_FORMAT_DXT1_RGB: + return PIPE_FORMAT_DXT1_SRGB; + case PIPE_FORMAT_DXT1_RGBA: + return PIPE_FORMAT_DXT1_SRGBA; + case PIPE_FORMAT_DXT3_RGBA: + return PIPE_FORMAT_DXT3_SRGBA; + case PIPE_FORMAT_DXT5_RGBA: + return PIPE_FORMAT_DXT5_SRGBA; + default: + return PIPE_FORMAT_NONE; + } +} + /** * Return the number of components stored. * Formats with block size != 1x1 will always have 1 component (the block). -- cgit v1.2.3 From 72f8edfc0bb8613ac7c0decfd4199e83c8d8a737 Mon Sep 17 00:00:00 2001 From: Jerome Glisse Date: Tue, 10 Aug 2010 11:52:00 -0400 Subject: r600g: avoid reemiting literal, avoid scheduling empty cs Signed-off-by: Jerome Glisse --- src/gallium/drivers/r600/r600_asm.c | 3 ++- src/gallium/drivers/r600/r600_asm.h | 1 + src/gallium/drivers/r600/r600_context.c | 5 ++++- src/gallium/drivers/r600/radeon.h | 31 +++++++++++++++++++++++++++++++ src/gallium/winsys/r600/drm/radeon_ctx.c | 2 ++ src/gallium/winsys/r600/drm/radeon_priv.h | 30 ------------------------------ 6 files changed, 40 insertions(+), 32 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c index 16c98504ad..ae818bf19b 100644 --- a/src/gallium/drivers/r600/r600_asm.c +++ b/src/gallium/drivers/r600/r600_asm.c @@ -179,12 +179,13 @@ int r600_bc_add_literal(struct r600_bc *bc, const u32 *value) return -EINVAL; } alu = LIST_ENTRY(struct r600_bc_alu, bc->cf_last->alu.prev, list); - if (!alu->last || !alu->nliteral) { + if (!alu->last || !alu->nliteral || alu->literal_added) { return 0; } memcpy(alu->value, value, 4 * 4); bc->cf_last->ndw += alu->nliteral; bc->ndw += alu->nliteral; + alu->literal_added = 1; return 0; } diff --git a/src/gallium/drivers/r600/r600_asm.h b/src/gallium/drivers/r600/r600_asm.h index 3fd94dbda0..10d98afaf0 100644 --- a/src/gallium/drivers/r600/r600_asm.h +++ b/src/gallium/drivers/r600/r600_asm.h @@ -48,6 +48,7 @@ struct r600_bc_alu { unsigned last; unsigned is_op3; unsigned nliteral; + unsigned literal_added; u32 value[4]; }; diff --git a/src/gallium/drivers/r600/r600_context.c b/src/gallium/drivers/r600/r600_context.c index 052eb1cd6d..edde80c660 100644 --- a/src/gallium/drivers/r600/r600_context.c +++ b/src/gallium/drivers/r600/r600_context.c @@ -54,15 +54,18 @@ void r600_flush(struct pipe_context *ctx, unsigned flags, /* FIXME dumping should be removed once shader support instructions * without throwing bad code */ + if (!rctx->ctx->cpm4) + goto out; sprintf(dname, "gallium-%08d.bof", dc); if (dc < 1) radeon_ctx_dump_bof(rctx->ctx, dname); #if 1 radeon_ctx_submit(rctx->ctx); #endif + dc++; +out: rctx->ctx = radeon_ctx_decref(rctx->ctx); rctx->ctx = radeon_ctx(rscreen->rw); - dc++; } static void r600_init_config(struct r600_context *rctx) diff --git a/src/gallium/drivers/r600/radeon.h b/src/gallium/drivers/r600/radeon.h index 00cff41b4f..8f00a4895a 100644 --- a/src/gallium/drivers/r600/radeon.h +++ b/src/gallium/drivers/r600/radeon.h @@ -156,6 +156,37 @@ int radeon_ctx_pm4(struct radeon_ctx *ctx); int radeon_ctx_submit(struct radeon_ctx *ctx); void radeon_ctx_dump_bof(struct radeon_ctx *ctx, const char *file); +/* + * radeon context functions + */ +#pragma pack(1) +struct radeon_cs_reloc { + uint32_t handle; + uint32_t read_domain; + uint32_t write_domain; + uint32_t flags; +}; +#pragma pack() + +struct radeon_ctx { + int refcount; + struct radeon *radeon; + u32 *pm4; + u32 cpm4; + u32 draw_cpm4; + unsigned id; + unsigned next_id; + unsigned nreloc; + struct radeon_cs_reloc *reloc; + unsigned nbo; + struct radeon_bo **bo; + unsigned ndraw; + struct radeon_draw *cdraw; + struct radeon_draw **draw; + unsigned nstate; + struct radeon_state **state; +}; + /* * R600/R700 */ diff --git a/src/gallium/winsys/r600/drm/radeon_ctx.c b/src/gallium/winsys/r600/drm/radeon_ctx.c index 6b0eba0b28..ff70ce6de7 100644 --- a/src/gallium/winsys/r600/drm/radeon_ctx.c +++ b/src/gallium/winsys/r600/drm/radeon_ctx.c @@ -151,6 +151,8 @@ int radeon_ctx_submit(struct radeon_ctx *ctx) uint64_t chunk_array[2]; int r = 0; + if (!ctx->cpm4) + return 0; #if 0 for (r = 0; r < ctx->cpm4; r++) { fprintf(stderr, "0x%08X\n", ctx->pm4[r]); diff --git a/src/gallium/winsys/r600/drm/radeon_priv.h b/src/gallium/winsys/r600/drm/radeon_priv.h index b91421f438..96c0d060f7 100644 --- a/src/gallium/winsys/r600/drm/radeon_priv.h +++ b/src/gallium/winsys/r600/drm/radeon_priv.h @@ -68,36 +68,6 @@ extern int radeon_is_family_compatible(unsigned family1, unsigned family2); extern int radeon_reg_id(struct radeon *radeon, unsigned offset, unsigned *typeid, unsigned *stateid, unsigned *id); extern unsigned radeon_type_from_id(struct radeon *radeon, unsigned id); -/* - * radeon context functions - */ -#pragma pack(1) -struct radeon_cs_reloc { - uint32_t handle; - uint32_t read_domain; - uint32_t write_domain; - uint32_t flags; -}; -#pragma pack() - -struct radeon_ctx { - int refcount; - struct radeon *radeon; - u32 *pm4; - u32 cpm4; - u32 draw_cpm4; - unsigned id; - unsigned next_id; - unsigned nreloc; - struct radeon_cs_reloc *reloc; - unsigned nbo; - struct radeon_bo **bo; - unsigned ndraw; - struct radeon_draw *cdraw; - struct radeon_draw **draw; - unsigned nstate; - struct radeon_state **state; -}; int radeon_ctx_set_bo_new(struct radeon_ctx *ctx, struct radeon_bo *bo); struct radeon_bo *radeon_ctx_get_bo(struct radeon_ctx *ctx, unsigned reloc); -- cgit v1.2.3 From c298bab60ea63882f34825a35cbc60f662783e64 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Thu, 5 Aug 2010 10:19:00 -0700 Subject: r300/compiler: Implement hardware assisted loops for vertex shaders. Single loops work, but nested loops do not. --- src/gallium/drivers/r300/r300_emit.c | 16 +++ src/gallium/drivers/r300/r300_reg.h | 21 ++++ src/gallium/drivers/r300/r300_state.c | 4 +- src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c | 2 +- src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c | 115 ++++++++++++++++++--- .../drivers/dri/r300/compiler/r3xx_vertprog_dump.c | 21 +++- src/mesa/drivers/dri/r300/compiler/radeon_code.h | 19 +++- .../drivers/dri/r300/compiler/radeon_compiler.h | 1 + .../dri/r300/compiler/radeon_emulate_loops.c | 8 +- .../dri/r300/compiler/radeon_emulate_loops.h | 3 +- src/mesa/drivers/dri/r300/r300_reg.h | 21 ++++ 11 files changed, 210 insertions(+), 21 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c index 7bd43b6eb5..98958d1a2e 100644 --- a/src/gallium/drivers/r300/r300_emit.c +++ b/src/gallium/drivers/r300/r300_emit.c @@ -936,6 +936,22 @@ void r300_emit_vs_state(struct r300_context* r300, unsigned size, void* state) OUT_CS_TABLE(data, 4); } } + + /* Emit flow control instructions. */ + if (code->num_fc_ops) { + + OUT_CS_REG(R300_VAP_PVS_FLOW_CNTL_OPC, code->fc_ops); + if (r300screen->caps.is_r500) { + OUT_CS_REG_SEQ(R500_VAP_PVS_FLOW_CNTL_ADDRS_LW_0, code->num_fc_ops * 2); + OUT_CS_TABLE(code->fc_op_addrs.r500, code->num_fc_ops * 2); + } else { + OUT_CS_REG_SEQ(R300_VAP_PVS_FLOW_CNTL_ADDRS_0, code->num_fc_ops); + OUT_CS_TABLE(code->fc_op_addrs.r300, code->num_fc_ops); + } + OUT_CS_REG_SEQ(R300_VAP_PVS_FLOW_CNTL_LOOP_INDEX_0, code->num_fc_ops); + OUT_CS_TABLE(code->fc_loop_index, code->num_fc_ops); + } + END_CS; } diff --git a/src/gallium/drivers/r300/r300_reg.h b/src/gallium/drivers/r300/r300_reg.h index 99a9d65055..60d3b600cb 100644 --- a/src/gallium/drivers/r300/r300_reg.h +++ b/src/gallium/drivers/r300/r300_reg.h @@ -496,6 +496,12 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. #define R300_VAP_GB_HORZ_CLIP_ADJ 0x2228 #define R300_VAP_GB_HORZ_DISC_ADJ 0x222c +#define R300_VAP_PVS_FLOW_CNTL_ADDRS_0 0x2230 +#define R300_PVS_FC_ACT_ADRS(x) ((x) << 0) +#define R300_PVS_FC_LOOP_CNT_JMP_INST(x) ((x) << 8) +#define R300_PVS_FC_LAST_INST(x) ((x) << 16) +#define R300_PVS_FC_RTN_INST(x) ((x) << 24) + /* gap */ /* Sometimes, END_OF_PKT and 0x2284=0 are the only commands sent between @@ -514,6 +520,10 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R300_2288_R300 0x00750000 /* -- nh */ # define R300_2288_RV350 0x0000FFFF /* -- Vladimir */ +#define R300_VAP_PVS_FLOW_CNTL_LOOP_INDEX_0 0x2290 +#define R300_PVS_FC_LOOP_INIT_VAL(x) ((x) << 0) +#define R300_PVS_FC_LOOP_STEP_VAL(x) ((x) << 8) + /* gap */ /* Addresses are relative to the vertex program instruction area of the @@ -548,6 +558,9 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. #define R300_VAP_PVS_CODE_CNTL_1 0x22D8 # define R300_PVS_LAST_VTX_SRC_INST_SHIFT 0 #define R300_VAP_PVS_FLOW_CNTL_OPC 0x22DC +#define R300_VAP_PVS_FC_OPC_JUMP(x) (1 << (2 * (x))) +#define R300_VAP_PVS_FC_OPC_LOOP(x) (2 << (2 * (x))) +#define R300_VAP_PVS_FC_OPC_JSR(x) (3 << (2 * (x))) /* The entire range from 0x2300 to 0x2AC inclusive seems to be used for * immediate vertices @@ -564,6 +577,14 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. /* write 0 to indicate end of packet? */ #define R300_VAP_VTX_END_OF_PKT 0x24AC +#define R500_VAP_PVS_FLOW_CNTL_ADDRS_LW_0 0x2500 +#define R500_PVS_FC_ACT_ADRS(x) ((x) << 0) +#define R500_PVS_FC_LOOP_CNT_JMP_INST(x) ((x) << 16) + +#define R500_VAP_PVS_FLOW_CNTL_ADDRS_UW_0 0x2504 +#define R500_PVS_FC_LAST_INST(x) ((x) << 0) +#define R500_PVS_FC_RTN_INST(x) ((x) << 16) + /* gap */ /* These are values from r300_reg/r300_reg.h - they are known to be correct diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c index 9db5e9e054..e62a33daeb 100644 --- a/src/gallium/drivers/r300/r300_state.c +++ b/src/gallium/drivers/r300/r300_state.c @@ -1758,10 +1758,12 @@ static void r300_bind_vs_state(struct pipe_context* pipe, void* shader) r300->rs_block_state.dirty = TRUE; /* Will be updated before the emission. */ if (r300->screen->caps.has_tcl) { + unsigned fc_op_dwords = r300->screen->caps.is_r500 ? 3 : 2; r300->vs_state.dirty = TRUE; r300->vs_state.size = vs->code.length + 9 + - (vs->immediates_count ? vs->immediates_count * 4 + 3 : 0); + (vs->immediates_count ? vs->immediates_count * 4 + 3 : 0) + + (vs->code.num_fc_ops ? vs->code.num_fc_ops * fc_op_dwords + 4 : 0); if (vs->externals_count) { r300->vs_constants.dirty = TRUE; diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c index c6246a81a2..d2fa816894 100644 --- a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c +++ b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c @@ -113,7 +113,7 @@ void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c) debug_program_log(c, "after unroll loops"); } else{ - rc_transform_loops(&c->Base, &loop_state); + rc_transform_loops(&c->Base, &loop_state, -1); debug_program_log(c, "after transform loops"); rc_emulate_branches(&c->Base); diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c index e940fedec2..7c2ba2fc09 100644 --- a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c +++ b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c @@ -32,6 +32,11 @@ #include "radeon_emulate_branches.h" #include "radeon_emulate_loops.h" +struct loop { + int BgnLoop; + +}; + /* * Take an already-setup and valid source then swizzle it appropriately to * obtain a constant ZERO or ONE source. @@ -337,6 +342,10 @@ static void translate_vertex_program(struct r300_vertex_program_compiler * compi { struct rc_instruction *rci; + struct loop * loops; + int current_loop_depth = 0; + int loops_reserved = 0; + compiler->code->pos_end = 0; /* Not supported yet */ compiler->code->length = 0; @@ -385,6 +394,68 @@ static void translate_vertex_program(struct r300_vertex_program_compiler * compi case RC_OPCODE_SIN: ei_math1(compiler->code, ME_SIN, vpi, inst); break; case RC_OPCODE_SLT: ei_vector2(compiler->code, VE_SET_LESS_THAN, vpi, inst); break; case RC_OPCODE_SNE: ei_vector2(compiler->code, VE_SET_NOT_EQUAL, vpi, inst); break; + case RC_OPCODE_BGNLOOP: + { + struct loop * l; + + if ((!compiler->Base.is_r500 + && loops_reserved >= R300_VS_MAX_LOOP_DEPTH) + || loops_reserved >= R500_VS_MAX_FC_DEPTH) { + rc_error(&compiler->Base, + "Loops are nested too deep."); + return; + } + memory_pool_array_reserve(&compiler->Base.Pool, + struct loop, loops, current_loop_depth, + loops_reserved, 1); + l = &loops[current_loop_depth++]; + memset(l , 0, sizeof(struct loop)); + l->BgnLoop = (compiler->code->length / 4); + continue; + } + case RC_OPCODE_ENDLOOP: + { + struct loop * l = &loops[current_loop_depth - 1]; + unsigned int act_addr = l->BgnLoop - 1; + unsigned int last_addr = (compiler->code->length / 4) - 1; + unsigned int ret_addr = l->BgnLoop; + + if (loops_reserved >= R300_VS_MAX_FC_OPS) { + rc_error(&compiler->Base, + "Too many flow control instructions."); + return; + } + if (compiler->Base.is_r500) { + compiler->code->fc_op_addrs.r500 + [compiler->code->num_fc_ops].lw = + R500_PVS_FC_ACT_ADRS(act_addr) + | R500_PVS_FC_LOOP_CNT_JMP_INST(0xffff) + ; + compiler->code->fc_op_addrs.r500 + [compiler->code->num_fc_ops].uw = + R500_PVS_FC_LAST_INST(last_addr) + | R500_PVS_FC_RTN_INST(ret_addr) + ; + } else { + compiler->code->fc_op_addrs.r300 + [compiler->code->num_fc_ops] = + R300_PVS_FC_ACT_ADRS(act_addr) + | R300_PVS_FC_LOOP_CNT_JMP_INST(0xff) + | R300_PVS_FC_LAST_INST(last_addr) + | R300_PVS_FC_RTN_INST(ret_addr) + ; + } + compiler->code->fc_loop_index[compiler->code->num_fc_ops] = + R300_PVS_FC_LOOP_INIT_VAL(0x0) + | R300_PVS_FC_LOOP_STEP_VAL(0x1) + ; + compiler->code->fc_ops |= R300_VAP_PVS_FC_OPC_LOOP( + compiler->code->num_fc_ops); + compiler->code->num_fc_ops++; + current_loop_depth--; + continue; + } + default: rc_error(&compiler->Base, "Unknown opcode %s\n", rc_get_opcode_info(vpi->Opcode)->Name); return; @@ -406,6 +477,7 @@ struct temporary_allocation { static void allocate_temporary_registers(struct r300_vertex_program_compiler * compiler) { struct rc_instruction *inst; + struct rc_instruction *end_loop = NULL; unsigned int num_orig_temps = 0; char hwtemps[R300_VS_MAX_TEMPS]; struct temporary_allocation * ta; @@ -440,10 +512,35 @@ static void allocate_temporary_registers(struct r300_vertex_program_compiler * c /* Pass 2: Determine original temporary lifetimes */ for(inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions; inst = inst->Next) { const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); + /* Instructions inside of loops need to use the ENDLOOP + * instruction as their LastRead. */ + if (!end_loop && inst->U.I.Opcode == RC_OPCODE_BGNLOOP) { + int endloops = 1; + struct rc_instruction * ptr; + for(ptr = inst->Next; + ptr != &compiler->Base.Program.Instructions; + ptr = ptr->Next){ + if (ptr->U.I.Opcode == RC_OPCODE_BGNLOOP) { + endloops++; + } else if (ptr->U.I.Opcode == RC_OPCODE_ENDLOOP) { + endloops--; + if (endloops <= 0) { + end_loop = ptr; + break; + } + } + } + } + + if (inst == end_loop) { + end_loop = NULL; + continue; + } for (i = 0; i < opcode->NumSrcRegs; ++i) { if (inst->U.I.SrcReg[i].File == RC_FILE_TEMPORARY) - ta[inst->U.I.SrcReg[i].Index].LastRead = inst; + ta[inst->U.I.SrcReg[i].Index].LastRead = + end_loop ? end_loop : inst; } } @@ -640,17 +737,11 @@ void r3xx_compile_vertex_program(struct r300_vertex_program_compiler* compiler) debug_program_log(compiler, "before compilation"); - /* XXX Ideally this should be done only for r3xx, but since - * we don't have branching support for r5xx, we use the emulation - * on all chipsets. */ + if (compiler->Base.is_r500) + rc_transform_loops(&compiler->Base, &loop_state, R500_VS_MAX_ALU); + else + rc_transform_loops(&compiler->Base, &loop_state, R300_VS_MAX_ALU); - if (compiler->Base.is_r500){ - rc_transform_loops(&compiler->Base, &loop_state); - rc_emulate_loops(&loop_state, R500_VS_MAX_ALU); - } else { - rc_transform_loops(&compiler->Base, &loop_state); - rc_emulate_loops(&loop_state, R300_VS_MAX_ALU); - } debug_program_log(compiler, "after emulate loops"); rc_emulate_branches(&compiler->Base); @@ -717,6 +808,6 @@ void r3xx_compile_vertex_program(struct r300_vertex_program_compiler* compiler) if (compiler->Base.Debug) { fprintf(stderr, "Final vertex program code:\n"); - r300_vertex_program_dump(compiler->code); + r300_vertex_program_dump(compiler); } } diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog_dump.c b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog_dump.c index 5800f1a78e..66e352d05d 100644 --- a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog_dump.c +++ b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog_dump.c @@ -20,6 +20,7 @@ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE * USE OR OTHER DEALINGS IN THE SOFTWARE. */ +#include "radeon_compiler.h" #include "radeon_code.h" #include @@ -160,8 +161,9 @@ static void r300_vs_src_dump(uint32_t src) r300_vs_swiz_debug[(src >> 22) & 0x7]); } -void r300_vertex_program_dump(struct r300_vertex_program_code * vs) +void r300_vertex_program_dump(struct r300_vertex_program_compiler * c) { + struct r300_vertex_program_code * vs = c->code; unsigned instrcount = vs->length / 4; unsigned i; @@ -177,4 +179,21 @@ void r300_vertex_program_dump(struct r300_vertex_program_code * vs) r300_vs_src_dump(vs->body.d[offset+1+src]); } } + + fprintf(stderr, "Flow Control Ops: 0x%08x\n",vs->fc_ops); + for(i = 0; i < vs->num_fc_ops; i++) { + switch((vs->fc_ops >> (i * 2)) & 0x3 ) { + case 0: fprintf(stderr, "NOP"); break; + case 1: fprintf(stderr, "JUMP"); break; + case 2: fprintf(stderr, "LOOP"); break; + case 3: fprintf(stderr, "JSR"); break; + } + if (c->Base.is_r500) { + fprintf(stderr,": uw-> 0x%08x lw-> 0x%08x\n", + vs->fc_op_addrs.r500[i].uw, + vs->fc_op_addrs.r500[i].lw); + } else { + fprintf(stderr,": 0x%08x\n", vs->fc_op_addrs.r300[i]); + } + } } diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_code.h b/src/mesa/drivers/dri/r300/compiler/radeon_code.h index e14a3520dd..896246d203 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_code.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_code.h @@ -243,6 +243,12 @@ struct rX00_fragment_program_code { #define R500_VS_MAX_ALU 1024 #define R500_VS_MAX_ALU_DWORDS (R500_VS_MAX_ALU * 4) #define R300_VS_MAX_TEMPS 32 +/* This is the max for all chipsets (r300-r500) */ +#define R300_VS_MAX_FC_OPS 16 +/* The r500 maximum depth is not just for loops, but any combination of loops + * and subroutine jumps. */ +#define R500_VS_MAX_FC_DEPTH 8 +#define R300_VS_MAX_LOOP_DEPTH 1 #define VSF_MAX_INPUTS 32 #define VSF_MAX_OUTPUTS 32 @@ -263,9 +269,18 @@ struct r300_vertex_program_code { uint32_t InputsRead; uint32_t OutputsWritten; -}; -void r300_vertex_program_dump(struct r300_vertex_program_code * vs); + unsigned int num_fc_ops; + uint32_t fc_ops; + union { + uint32_t r300[R300_VS_MAX_FC_OPS]; + struct { + uint32_t lw; + uint32_t uw; + } r500[R300_VS_MAX_FC_OPS]; + } fc_op_addrs; + int32_t fc_loop_index[R300_VS_MAX_FC_OPS]; +}; #endif /* RADEON_CODE_H */ diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h index f15905d79d..bbd57cca63 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h @@ -113,5 +113,6 @@ struct r300_vertex_program_compiler { }; void r3xx_compile_vertex_program(struct r300_vertex_program_compiler* c); +void r300_vertex_program_dump(struct r300_vertex_program_compiler * c); #endif /* RADEON_COMPILER_H */ diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.c b/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.c index 2a3306f906..32d4b45dd6 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.c @@ -423,7 +423,8 @@ static int build_loop_info(struct radeon_compiler * c, struct loop_info * loop, * @param inst A pointer to a BGNLOOP instruction. * @return 1 for success, 0 for failure */ -int transform_loop(struct emulate_loop_state * s, struct rc_instruction * inst) +static int transform_loop(struct emulate_loop_state * s, + struct rc_instruction * inst) { struct loop_info * loop; @@ -435,7 +436,7 @@ int transform_loop(struct emulate_loop_state * s, struct rc_instruction * inst) if (!build_loop_info(s->C, loop, inst)) return 0; - if(try_unroll_loop(s->C, loop, -1)){ + if(try_unroll_loop(s->C, loop, s->prog_inst_limit)){ return 1; } @@ -472,12 +473,13 @@ int transform_loop(struct emulate_loop_state * s, struct rc_instruction * inst) } void rc_transform_loops(struct radeon_compiler *c, - struct emulate_loop_state * s) + struct emulate_loop_state * s, int prog_inst_limit) { struct rc_instruction * ptr; memset(s, 0, sizeof(struct emulate_loop_state)); s->C = c; + s->prog_inst_limit = prog_inst_limit; for(ptr = s->C->Program.Instructions.Next; ptr != &s->C->Program.Instructions; ptr = ptr->Next) { if(ptr->Type == RC_INSTRUCTION_NORMAL && diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.h b/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.h index 86d91ef14b..bba1f68e30 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.h @@ -21,10 +21,11 @@ struct emulate_loop_state { struct loop_info * Loops; unsigned int LoopCount; unsigned int LoopReserved; + int prog_inst_limit; }; void rc_transform_loops(struct radeon_compiler *c, - struct emulate_loop_state * s); + struct emulate_loop_state * s, int prog_inst_limit); void rc_unroll_loops(struct radeon_compiler * c, int prog_inst_limit); diff --git a/src/mesa/drivers/dri/r300/r300_reg.h b/src/mesa/drivers/dri/r300/r300_reg.h index f25264b6f2..f7705b0f6f 100644 --- a/src/mesa/drivers/dri/r300/r300_reg.h +++ b/src/mesa/drivers/dri/r300/r300_reg.h @@ -441,6 +441,12 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. #define R300_VAP_GB_HORZ_CLIP_ADJ 0x2228 #define R300_VAP_GB_HORZ_DISC_ADJ 0x222c +#define R300_VAP_PVS_FLOW_CNTL_ADDRS_0 0x2230 +#define R300_PVS_FC_ACT_ADRS(x) ((x) << 0) +#define R300_PVS_FC_LOOP_CNT_JMP_INST(x) ((x) << 8) +#define R300_PVS_FC_LAST_INST(x) ((x) << 16) +#define R300_PVS_FC_RTN_INST(x) ((x) << 24) + /* gap */ /* Sometimes, END_OF_PKT and 0x2284=0 are the only commands sent between @@ -459,6 +465,10 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R300_2288_R300 0x00750000 /* -- nh */ # define R300_2288_RV350 0x0000FFFF /* -- Vladimir */ +#define R300_VAP_PVS_FLOW_CNTL_LOOP_INDEX_0 0x2290 +#define R300_PVS_FC_LOOP_INIT_VAL(x) ((x) << 0) +#define R300_PVS_FC_LOOP_STEP_VAL(x) ((x) << 8) + /* gap */ /* Addresses are relative to the vertex program instruction area of the @@ -489,6 +499,9 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. #define R300_VAP_PVS_CODE_CNTL_1 0x22D8 # define R300_PVS_LAST_VTX_SRC_INST_SHIFT 0 #define R300_VAP_PVS_FLOW_CNTL_OPC 0x22DC +#define R300_VAP_PVS_FC_OPC_JUMP(x) (1 << (2 * (x))) +#define R300_VAP_PVS_FC_OPC_LOOP(x) (2 << (2 * (x))) +#define R300_VAP_PVS_FC_OPC_JSR(x) (3 << (2 * (x))) /* The entire range from 0x2300 to 0x2AC inclusive seems to be used for * immediate vertices @@ -505,6 +518,14 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. /* write 0 to indicate end of packet? */ #define R300_VAP_VTX_END_OF_PKT 0x24AC +#define R500_VAP_PVS_FLOW_CNTL_ADDRS_LW_0 0x2500 +#define R500_PVS_FC_ACT_ADRS(x) ((x) << 0) +#define R500_PVS_FC_LOOP_CNT_JMP_INST(x) ((x) << 16) + +#define R500_VAP_PVS_FLOW_CNTL_ADDRS_UW_0 0x2504 +#define R500_PVS_FC_LAST_INST(x) ((x) << 0) +#define R500_PVS_FC_RTN_INST(x) ((x) << 16) + /* gap */ /* These are values from r300_reg/r300_reg.h - they are known to be correct -- cgit v1.2.3 From 34cdad62dd26ce2e2d59df1c4b82bb301894b762 Mon Sep 17 00:00:00 2001 From: Vinson Lee Date: Tue, 10 Aug 2010 17:39:13 -0700 Subject: r300g: Remove unnecessary header. --- src/gallium/drivers/r300/r300_state_derived.c | 1 - 1 file changed, 1 deletion(-) (limited to 'src/gallium') diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c index 693b1e29f2..1f36e7758f 100644 --- a/src/gallium/drivers/r300/r300_state_derived.c +++ b/src/gallium/drivers/r300/r300_state_derived.c @@ -35,7 +35,6 @@ #include "r300_state_inlines.h" #include "r300_texture.h" #include "r300_vs.h" -#include "r300_winsys.h" /* r300_state_derived: Various bits of state which are dependent upon * currently bound CSO data. */ -- cgit v1.2.3 From 61205f6189bfbd410f68c3bcfd645a4e29e03b4a Mon Sep 17 00:00:00 2001 From: Vinson Lee Date: Tue, 10 Aug 2010 17:53:08 -0700 Subject: svga: Remove unnecessary headers. --- src/gallium/winsys/svga/drm/vmw_screen_dri.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/winsys/svga/drm/vmw_screen_dri.c b/src/gallium/winsys/svga/drm/vmw_screen_dri.c index 1b0d10f60d..7bd4407e9f 100644 --- a/src/gallium/winsys/svga/drm/vmw_screen_dri.c +++ b/src/gallium/winsys/svga/drm/vmw_screen_dri.c @@ -32,8 +32,6 @@ #include "vmw_screen.h" #include "vmw_surface.h" -#include "vmw_fence.h" -#include "vmw_context.h" #include "svga_drm_public.h" #include "state_tracker/drm_driver.h" -- cgit v1.2.3 From 683ef52e19576f6e1263bc7d25fc9475c519eade Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Tue, 10 Aug 2010 14:21:05 +0200 Subject: r300g: implement gl_FrontFacing --- src/gallium/drivers/r300/r300_fs.c | 15 +++++++- src/gallium/drivers/r300/r300_shader_semantics.h | 2 + src/gallium/drivers/r300/r300_state_derived.c | 43 +++++++++++++++++++--- .../drivers/dri/r300/compiler/radeon_compiler.c | 43 ++++++++++++++++++++++ .../drivers/dri/r300/compiler/radeon_compiler.h | 1 + 5 files changed, 97 insertions(+), 7 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/drivers/r300/r300_fs.c b/src/gallium/drivers/r300/r300_fs.c index 5c905c1159..2a0c30620a 100644 --- a/src/gallium/drivers/r300/r300_fs.c +++ b/src/gallium/drivers/r300/r300_fs.c @@ -72,6 +72,11 @@ void r300_shader_read_fs_inputs(struct tgsi_shader_info* info, fs_inputs->wpos = i; break; + case TGSI_SEMANTIC_FACE: + assert(index == 0); + fs_inputs->face = i; + break; + default: fprintf(stderr, "r300: FP: Unknown input semantic: %i\n", info->input_semantic_name[i]); @@ -120,6 +125,9 @@ static void allocate_hardware_inputs( allocate(mydata, inputs->color[i], reg++); } } + if (inputs->face != ATTR_UNUSED) { + allocate(mydata, inputs->face, reg++); + } for (i = 0; i < ATTR_GENERIC_COUNT; i++) { if (inputs->generic[i] != ATTR_UNUSED) { allocate(mydata, inputs->generic[i], reg++); @@ -360,13 +368,14 @@ static void r300_translate_fragment_shader( { struct r300_fragment_program_compiler compiler; struct tgsi_to_rc ttr; - int wpos; + int wpos, face; unsigned i; tgsi_scan_shader(tokens, &shader->info); r300_shader_read_fs_inputs(&shader->info, &shader->inputs); wpos = shader->inputs.wpos; + face = shader->inputs.face; /* Setup the compiler. */ memset(&compiler, 0, sizeof(compiler)); @@ -406,6 +415,10 @@ static void r300_translate_fragment_shader( rc_transform_fragment_wpos(&compiler.Base, wpos, wpos, TRUE); } + if (face != ATTR_UNUSED) { + rc_transform_fragment_face(&compiler.Base, face); + } + /* Invoke the compiler */ r3xx_compile_fragment_program(&compiler); diff --git a/src/gallium/drivers/r300/r300_shader_semantics.h b/src/gallium/drivers/r300/r300_shader_semantics.h index cb7a37033f..4be23e64ce 100644 --- a/src/gallium/drivers/r300/r300_shader_semantics.h +++ b/src/gallium/drivers/r300/r300_shader_semantics.h @@ -38,6 +38,7 @@ struct r300_shader_semantics { int psize; int color[ATTR_COLOR_COUNT]; int bcolor[ATTR_COLOR_COUNT]; + int face; int generic[ATTR_GENERIC_COUNT]; int fog; int wpos; @@ -50,6 +51,7 @@ static INLINE void r300_shader_semantics_reset( info->pos = ATTR_UNUSED; info->psize = ATTR_UNUSED; + info->face = ATTR_UNUSED; info->fog = ATTR_UNUSED; info->wpos = ATTR_UNUSED; diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c index 1f36e7758f..39000477cb 100644 --- a/src/gallium/drivers/r300/r300_state_derived.c +++ b/src/gallium/drivers/r300/r300_state_derived.c @@ -46,6 +46,11 @@ enum r300_rs_swizzle { SWIZ_0001, }; +enum r300_rs_col_write_type { + WRITE_COLOR = 0, + WRITE_FACE +}; + static void r300_draw_emit_attrib(struct r300_context* r300, enum attrib_emit emit, enum interp_mode interp, @@ -203,8 +208,10 @@ static void r300_rs_col(struct r300_rs_block* rs, int id, int ptr, rs->inst[id] |= R300_RS_INST_COL_ID(id); } -static void r300_rs_col_write(struct r300_rs_block* rs, int id, int fp_offset) +static void r300_rs_col_write(struct r300_rs_block* rs, int id, int fp_offset, + enum r300_rs_col_write_type type) { + assert(type != WRITE_COLOR); rs->inst[id] |= R300_RS_INST_COL_CN_WRITE | R300_RS_INST_COL_ADDR(fp_offset); } @@ -252,10 +259,16 @@ static void r500_rs_col(struct r300_rs_block* rs, int id, int ptr, rs->inst[id] |= R500_RS_INST_COL_ID(id); } -static void r500_rs_col_write(struct r300_rs_block* rs, int id, int fp_offset) +static void r500_rs_col_write(struct r300_rs_block* rs, int id, int fp_offset, + enum r300_rs_col_write_type type) { - rs->inst[id] |= R500_RS_INST_COL_CN_WRITE | - R500_RS_INST_COL_ADDR(fp_offset); + if (type == WRITE_FACE) + rs->inst[id] |= R500_RS_INST_COL_CN_WRITE_BACKFACE | + R500_RS_INST_COL_ADDR(fp_offset); + else + rs->inst[id] |= R500_RS_INST_COL_CN_WRITE | + R500_RS_INST_COL_ADDR(fp_offset); + } static void r500_rs_tex(struct r300_rs_block* rs, int id, int ptr, @@ -305,7 +318,7 @@ static void r300_update_rs_block(struct r300_context *r300) struct r300_rs_block rs = {0}; int i, col_count = 0, tex_count = 0, fp_offset = 0, count, loc = 0, tex_ptr = 0; void (*rX00_rs_col)(struct r300_rs_block*, int, int, enum r300_rs_swizzle); - void (*rX00_rs_col_write)(struct r300_rs_block*, int, int); + void (*rX00_rs_col_write)(struct r300_rs_block*, int, int, enum r300_rs_col_write_type); void (*rX00_rs_tex)(struct r300_rs_block*, int, int, enum r300_rs_swizzle); void (*rX00_rs_tex_write)(struct r300_rs_block*, int, int); boolean any_bcolor_used = vs_outputs->bcolor[0] != ATTR_UNUSED || @@ -350,7 +363,7 @@ static void r300_update_rs_block(struct r300_context *r300) /* Write it to the FS input register if it's needed by the FS. */ if (fs_inputs->color[i] != ATTR_UNUSED) { - rX00_rs_col_write(&rs, col_count, fp_offset); + rX00_rs_col_write(&rs, col_count, fp_offset, WRITE_COLOR); fp_offset++; DBG(r300, DBG_RS, @@ -398,6 +411,24 @@ static void r300_update_rs_block(struct r300_context *r300) } } + /* gl_FrontFacing. + * Note that we can use either the two-sided color selection based on + * the front and back vertex shader colors, or gl_FrontFacing, + * but not both! It locks up otherwise. + * + * In Direct3D 9, the two-sided color selection can be used + * with shaders 2.0 only, while gl_FrontFacing can be used + * with shaders 3.0 only. The hardware apparently hasn't been designed + * to support both at the same time. */ + if (r300->screen->caps.is_r500 && fs_inputs->face != ATTR_UNUSED && + !(any_bcolor_used && r300->two_sided_color)) { + rX00_rs_col(&rs, col_count, col_count, SWIZ_XYZW); + rX00_rs_col_write(&rs, col_count, fp_offset, WRITE_FACE); + fp_offset++; + col_count++; + DBG(r300, DBG_RS, "r300: Rasterized FACE written to FS.\n"); + } + /* Rasterize texture coordinates. */ for (i = 0; i < ATTR_GENERIC_COUNT && tex_count < 8; i++) { bool sprite_coord = !!(r300->sprite_coord_enable & (1 << i)); diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c index 1c8ba864a4..935dc9b0a8 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c @@ -307,3 +307,46 @@ void rc_transform_fragment_wpos(struct radeon_compiler * c, unsigned wpos, unsig } } + +/** + * The FACE input in hardware contains 1 if it's a back face, 0 otherwise. + * Gallium and OpenGL define it the other way around. + * + * So let's just negate FACE at the beginning of the shader and rewrite the rest + * of the shader to read from the newly allocated temporary. + */ +void rc_transform_fragment_face(struct radeon_compiler *c, unsigned face) +{ + unsigned tempregi = rc_find_free_temporary(c); + struct rc_instruction *inst_add; + struct rc_instruction *inst; + + /* perspective divide */ + inst_add = rc_insert_new_instruction(c, &c->Program.Instructions); + inst_add->U.I.Opcode = RC_OPCODE_ADD; + + inst_add->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_add->U.I.DstReg.Index = tempregi; + inst_add->U.I.DstReg.WriteMask = RC_MASK_X; + + inst_add->U.I.SrcReg[0].File = RC_FILE_NONE; + inst_add->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_1111; + + inst_add->U.I.SrcReg[1].File = RC_FILE_INPUT; + inst_add->U.I.SrcReg[1].Index = face; + inst_add->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_XXXX; + inst_add->U.I.SrcReg[1].Negate = RC_MASK_XYZW; + + for (inst = inst_add->Next; inst != &c->Program.Instructions; inst = inst->Next) { + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); + unsigned i; + + for(i = 0; i < opcode->NumSrcRegs; i++) { + if (inst->U.I.SrcReg[i].File == RC_FILE_INPUT && + inst->U.I.SrcReg[i].Index == face) { + inst->U.I.SrcReg[i].File = RC_FILE_TEMPORARY; + inst->U.I.SrcReg[i].Index = tempregi; + } + } + } +} diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h index e15291dd19..7c42eb3ae5 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h @@ -81,6 +81,7 @@ void rc_move_output(struct radeon_compiler * c, unsigned output, unsigned new_ou void rc_copy_output(struct radeon_compiler * c, unsigned output, unsigned dup_output); void rc_transform_fragment_wpos(struct radeon_compiler * c, unsigned wpos, unsigned new_input, int full_vtransform); +void rc_transform_fragment_face(struct radeon_compiler *c, unsigned face); struct r300_fragment_program_compiler { struct radeon_compiler Base; -- cgit v1.2.3 From ca5227ce8b749fa3f00438c41066def6e0a8dbe4 Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Wed, 11 Aug 2010 02:58:50 +0200 Subject: r300g: initialize VAP_VTX_STATE_CNTL This got lost during the rasterizer rewrite. --- src/gallium/drivers/r300/r300_state_derived.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'src/gallium') diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c index 39000477cb..c047a127ba 100644 --- a/src/gallium/drivers/r300/r300_state_derived.c +++ b/src/gallium/drivers/r300/r300_state_derived.c @@ -337,6 +337,11 @@ static void r300_update_rs_block(struct r300_context *r300) rX00_rs_tex_write = r300_rs_tex_write; } + /* 0x5555 copied from classic, which means: + * Select user color 0 for COLOR0 up to COLOR7. + * What the hell does that mean? */ + rs.vap_vtx_state_cntl = 0x5555; + /* The position is always present in VAP. */ rs.vap_vsm_vtx_assm |= R300_INPUT_CNTL_POS; rs.vap_out_vtx_fmt[0] |= R300_VAP_OUTPUT_VTX_FMT_0__POS_PRESENT; -- cgit v1.2.3 From 0dcf0f9dfaa23b08d2bc20f8cbd02550c2632e52 Mon Sep 17 00:00:00 2001 From: Luca Barbieri Date: Wed, 11 Aug 2010 10:46:12 +0200 Subject: auxiliary: move Ben Skeggs' primitive splitter to common code This is a simple framework that handles splitting primitives in an abstract way. The user has to specify the primitive start, start index and count. Then, it can ask the primitive splitter to "draw" a chunk of the primitive, staying under a given vertex/index budget. The primitive splitter will then call user-supplied functions to emit a range of vertices/indices, as well as switch the edgeflag on or off. This is particularly useful for hardware that either has limits on the vertex count field, or where vertices are pushed on a FIFO or temporary buffer of limited size. Note that unlike other splitters, it does not manipulate data in any way, and merely asks a callback to do so, in vertex intervals. --- src/gallium/auxiliary/util/u_split_prim.h | 102 +++++++++++++++++++++++++++++ src/gallium/drivers/nouveau/nouveau_util.h | 100 ---------------------------- src/gallium/drivers/nv50/nv50_push.c | 2 +- src/gallium/drivers/nv50/nv50_vbo.c | 2 +- 4 files changed, 104 insertions(+), 102 deletions(-) create mode 100644 src/gallium/auxiliary/util/u_split_prim.h (limited to 'src/gallium') diff --git a/src/gallium/auxiliary/util/u_split_prim.h b/src/gallium/auxiliary/util/u_split_prim.h new file mode 100644 index 0000000000..3c438da0ba --- /dev/null +++ b/src/gallium/auxiliary/util/u_split_prim.h @@ -0,0 +1,102 @@ +/* Originally written by Ben Skeggs for the nv50 driver*/ +#include + +struct u_split_prim { + void *priv; + void (*emit)(void *priv, unsigned start, unsigned count); + void (*edge)(void *priv, boolean enabled); + + unsigned mode; + unsigned start; + unsigned p_start; + unsigned p_end; + + uint repeat_first:1; + uint close_first:1; + uint edgeflag_off:1; +}; + +static INLINE void +u_split_prim_init(struct u_split_prim *s, + unsigned mode, unsigned start, unsigned count) +{ + if (mode == PIPE_PRIM_LINE_LOOP) { + s->mode = PIPE_PRIM_LINE_STRIP; + s->close_first = 1; + } else { + s->mode = mode; + s->close_first = 0; + } + s->start = start; + s->p_start = start; + s->p_end = start + count; + s->edgeflag_off = 0; + s->repeat_first = 0; +} + +static INLINE boolean +u_split_prim_next(struct u_split_prim *s, unsigned max_verts) +{ + int repeat = 0; + + if (s->repeat_first) { + s->emit(s->priv, s->start, 1); + max_verts--; + if (s->edgeflag_off) { + s->edge(s->priv, TRUE); + s->edgeflag_off = FALSE; + } + } + + if (s->p_start + s->close_first + max_verts >= s->p_end) { + s->emit(s->priv, s->p_start, s->p_end - s->p_start); + if (s->close_first) + s->emit(s->priv, s->start, 1); + return TRUE; + } + + switch (s->mode) { + case PIPE_PRIM_LINES: + max_verts &= ~1; + break; + case PIPE_PRIM_LINE_STRIP: + repeat = 1; + break; + case PIPE_PRIM_POLYGON: + max_verts--; + s->emit(s->priv, s->p_start, max_verts); + s->edge(s->priv, FALSE); + s->emit(s->priv, s->p_start + max_verts, 1); + s->p_start += max_verts; + s->repeat_first = TRUE; + s->edgeflag_off = TRUE; + return FALSE; + case PIPE_PRIM_TRIANGLES: + max_verts = max_verts - (max_verts % 3); + break; + case PIPE_PRIM_TRIANGLE_STRIP: + /* to ensure winding stays correct, always split + * on an even number of generated triangles + */ + max_verts = max_verts & ~1; + repeat = 2; + break; + case PIPE_PRIM_TRIANGLE_FAN: + s->repeat_first = TRUE; + repeat = 1; + break; + case PIPE_PRIM_QUADS: + max_verts &= ~3; + break; + case PIPE_PRIM_QUAD_STRIP: + max_verts &= ~1; + repeat = 2; + break; + default: + break; + } + + s->emit (s->priv, s->p_start, max_verts); + s->p_start += (max_verts - repeat); + return FALSE; +} diff --git a/src/gallium/drivers/nouveau/nouveau_util.h b/src/gallium/drivers/nouveau/nouveau_util.h index a5e8537533..b165f7a611 100644 --- a/src/gallium/drivers/nouveau/nouveau_util.h +++ b/src/gallium/drivers/nouveau/nouveau_util.h @@ -88,104 +88,4 @@ static INLINE unsigned log2i(unsigned i) return r; } -struct u_split_prim { - void *priv; - void (*emit)(void *priv, unsigned start, unsigned count); - void (*edge)(void *priv, boolean enabled); - - unsigned mode; - unsigned start; - unsigned p_start; - unsigned p_end; - - uint repeat_first:1; - uint close_first:1; - uint edgeflag_off:1; -}; - -static INLINE void -u_split_prim_init(struct u_split_prim *s, - unsigned mode, unsigned start, unsigned count) -{ - if (mode == PIPE_PRIM_LINE_LOOP) { - s->mode = PIPE_PRIM_LINE_STRIP; - s->close_first = 1; - } else { - s->mode = mode; - s->close_first = 0; - } - s->start = start; - s->p_start = start; - s->p_end = start + count; - s->edgeflag_off = 0; - s->repeat_first = 0; -} - -static INLINE boolean -u_split_prim_next(struct u_split_prim *s, unsigned max_verts) -{ - int repeat = 0; - - if (s->repeat_first) { - s->emit(s->priv, s->start, 1); - max_verts--; - if (s->edgeflag_off) { - s->edge(s->priv, TRUE); - s->edgeflag_off = FALSE; - } - } - - if (s->p_start + s->close_first + max_verts >= s->p_end) { - s->emit(s->priv, s->p_start, s->p_end - s->p_start); - if (s->close_first) - s->emit(s->priv, s->start, 1); - return TRUE; - } - - switch (s->mode) { - case PIPE_PRIM_LINES: - max_verts &= ~1; - break; - case PIPE_PRIM_LINE_STRIP: - repeat = 1; - break; - case PIPE_PRIM_POLYGON: - max_verts--; - s->emit(s->priv, s->p_start, max_verts); - s->edge(s->priv, FALSE); - s->emit(s->priv, s->p_start + max_verts, 1); - s->p_start += max_verts; - s->repeat_first = TRUE; - s->edgeflag_off = TRUE; - return FALSE; - case PIPE_PRIM_TRIANGLES: - max_verts = max_verts - (max_verts % 3); - break; - case PIPE_PRIM_TRIANGLE_STRIP: - /* to ensure winding stays correct, always split - * on an even number of generated triangles - */ - max_verts = max_verts & ~1; - repeat = 2; - break; - case PIPE_PRIM_TRIANGLE_FAN: - s->repeat_first = TRUE; - repeat = 1; - break; - case PIPE_PRIM_QUADS: - max_verts &= ~3; - break; - case PIPE_PRIM_QUAD_STRIP: - max_verts &= ~1; - repeat = 2; - break; - default: - break; - } - - s->emit (s->priv, s->p_start, max_verts); - s->p_start += (max_verts - repeat); - return FALSE; -} - #endif diff --git a/src/gallium/drivers/nv50/nv50_push.c b/src/gallium/drivers/nv50/nv50_push.c index c3ac804146..ee87144dcf 100644 --- a/src/gallium/drivers/nv50/nv50_push.c +++ b/src/gallium/drivers/nv50/nv50_push.c @@ -2,8 +2,8 @@ #include "pipe/p_state.h" #include "util/u_inlines.h" #include "util/u_format.h" +#include "util/u_split_prim.h" -#include "nouveau/nouveau_util.h" #include "nv50_context.h" #include "nv50_resource.h" diff --git a/src/gallium/drivers/nv50/nv50_vbo.c b/src/gallium/drivers/nv50/nv50_vbo.c index e7f8fe33ed..0937668b84 100644 --- a/src/gallium/drivers/nv50/nv50_vbo.c +++ b/src/gallium/drivers/nv50/nv50_vbo.c @@ -24,8 +24,8 @@ #include "pipe/p_state.h" #include "util/u_inlines.h" #include "util/u_format.h" +#include "util/u_split_prim.h" -#include "nouveau/nouveau_util.h" #include "nv50_context.h" #include "nv50_resource.h" -- cgit v1.2.3 From eee5cea385b6871fa934a7882b2f214e3cbace8b Mon Sep 17 00:00:00 2001 From: Luca Barbieri Date: Wed, 11 Aug 2010 10:51:28 +0200 Subject: auxiliary: fix u_split_prim naming convention Current practice is to start identifiers with "util_" instead of "u_". --- src/gallium/auxiliary/util/u_split_prim.h | 6 +++--- src/gallium/drivers/nv50/nv50_push.c | 6 +++--- src/gallium/drivers/nv50/nv50_vbo.c | 6 +++--- 3 files changed, 9 insertions(+), 9 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/auxiliary/util/u_split_prim.h b/src/gallium/auxiliary/util/u_split_prim.h index 3c438da0ba..e526a73fc3 100644 --- a/src/gallium/auxiliary/util/u_split_prim.h +++ b/src/gallium/auxiliary/util/u_split_prim.h @@ -1,7 +1,7 @@ /* Originally written by Ben Skeggs for the nv50 driver*/ #include -struct u_split_prim { +struct util_split_prim { void *priv; void (*emit)(void *priv, unsigned start, unsigned count); void (*edge)(void *priv, boolean enabled); @@ -17,7 +17,7 @@ struct u_split_prim { }; static INLINE void -u_split_prim_init(struct u_split_prim *s, +util_split_prim_init(struct util_split_prim *s, unsigned mode, unsigned start, unsigned count) { if (mode == PIPE_PRIM_LINE_LOOP) { @@ -35,7 +35,7 @@ u_split_prim_init(struct u_split_prim *s, } static INLINE boolean -u_split_prim_next(struct u_split_prim *s, unsigned max_verts) +util_split_prim_next(struct util_split_prim *s, unsigned max_verts) { int repeat = 0; diff --git a/src/gallium/drivers/nv50/nv50_push.c b/src/gallium/drivers/nv50/nv50_push.c index ee87144dcf..6a2ffd5a3c 100644 --- a/src/gallium/drivers/nv50/nv50_push.c +++ b/src/gallium/drivers/nv50/nv50_push.c @@ -217,7 +217,7 @@ nv50_push_elements_instanced(struct pipe_context *pipe, 4; /* potential edgeflag enable/disable */ const unsigned v_overhead = 1 + /* VERTEX_DATA packet header */ 2; /* potential edgeflag modification */ - struct u_split_prim s; + struct util_split_prim s; unsigned vtx_size; boolean nzi = FALSE; int i; @@ -335,7 +335,7 @@ nv50_push_elements_instanced(struct pipe_context *pipe, ctx.attr[i].map = (uint8_t *)ctx.attr[i].map + ctx.attr[i].stride; } - u_split_prim_init(&s, mode, start, count); + util_split_prim_init(&s, mode, start, count); do { if (AVAIL_RING(chan) < p_overhead + (6 * vtx_size)) { FIRE_RING(chan); @@ -351,7 +351,7 @@ nv50_push_elements_instanced(struct pipe_context *pipe, BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BEGIN, 1); OUT_RING (chan, nv50_prim(s.mode) | (nzi ? (1 << 28) : 0)); - done = u_split_prim_next(&s, max_verts); + done = util_split_prim_next(&s, max_verts); BEGIN_RING(chan, tesla, NV50TCL_VERTEX_END, 1); OUT_RING (chan, 0); } while (!done); diff --git a/src/gallium/drivers/nv50/nv50_vbo.c b/src/gallium/drivers/nv50/nv50_vbo.c index 0937668b84..1f11950199 100644 --- a/src/gallium/drivers/nv50/nv50_vbo.c +++ b/src/gallium/drivers/nv50/nv50_vbo.c @@ -311,7 +311,7 @@ nv50_draw_elements_inline(struct pipe_context *pipe, struct pipe_transfer *transfer; struct instance a[16]; struct inline_ctx ctx; - struct u_split_prim s; + struct util_split_prim s; boolean nzi = FALSE; unsigned overhead; @@ -347,7 +347,7 @@ nv50_draw_elements_inline(struct pipe_context *pipe, unsigned max_verts; boolean done; - u_split_prim_init(&s, mode, start, count); + util_split_prim_init(&s, mode, start, count); do { if (AVAIL_RING(chan) < (overhead + 6)) { FIRE_RING(chan); @@ -366,7 +366,7 @@ nv50_draw_elements_inline(struct pipe_context *pipe, BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BEGIN, 1); OUT_RING (chan, nv50_prim(s.mode) | (nzi ? (1<<28) : 0)); - done = u_split_prim_next(&s, max_verts); + done = util_split_prim_next(&s, max_verts); BEGIN_RING(chan, tesla, NV50TCL_VERTEX_END, 1); OUT_RING (chan, 0); } while (!done); -- cgit v1.2.3 From 58b104d7f0890434aadbdebcd6002ba0a0e132ec Mon Sep 17 00:00:00 2001 From: Luca Barbieri Date: Wed, 11 Aug 2010 10:54:31 +0200 Subject: auxiliary: make primitive splitter assert on unimplemented adjacency prims They are unimplemented, even though the framework makes it possible to implement them well, and nv50 needs them. --- src/gallium/auxiliary/util/u_split_prim.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'src/gallium') diff --git a/src/gallium/auxiliary/util/u_split_prim.h b/src/gallium/auxiliary/util/u_split_prim.h index e526a73fc3..206e1ec311 100644 --- a/src/gallium/auxiliary/util/u_split_prim.h +++ b/src/gallium/auxiliary/util/u_split_prim.h @@ -92,8 +92,11 @@ util_split_prim_next(struct util_split_prim *s, unsigned max_verts) max_verts &= ~1; repeat = 2; break; - default: + case PIPE_PRIM_POINTS: break; + default: + /* TODO: implement adjacency primitives */ + assert(0); } s->emit (s->priv, s->p_start, max_verts); -- cgit v1.2.3 From 24f5ebb1d735ca7e8944b346359de5941e707047 Mon Sep 17 00:00:00 2001 From: Luca Barbieri Date: Fri, 6 Aug 2010 09:53:15 +0200 Subject: u_surfaces: fix surface leak due to off by one --- src/gallium/auxiliary/util/u_surfaces.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium') diff --git a/src/gallium/auxiliary/util/u_surfaces.c b/src/gallium/auxiliary/util/u_surfaces.c index b5d21570d5..f6e5801ef6 100644 --- a/src/gallium/auxiliary/util/u_surfaces.c +++ b/src/gallium/auxiliary/util/u_surfaces.c @@ -99,7 +99,7 @@ util_surfaces_destroy(struct util_surfaces *us, struct pipe_resource *pt, void ( if(us->u.array) { unsigned i; - for(i = 0; i < pt->last_level; ++i) + for(i = 0; i <= pt->last_level; ++i) { struct pipe_surface *ps = us->u.array[i]; if(ps) -- cgit v1.2.3 From e45b2ce2c3a52e9f8e6fb7e933ab2f95eec15be1 Mon Sep 17 00:00:00 2001 From: Luca Barbieri Date: Fri, 6 Aug 2010 09:46:14 +0200 Subject: u_surfaces: use cso_hash instead of util_hash_table Using cso_hash directly is the right thing since util_hash_table adds useless overhead and is harder to use for this application. --- src/gallium/auxiliary/util/u_surfaces.c | 79 ++++++++++++--------------------- src/gallium/auxiliary/util/u_surfaces.h | 5 +-- 2 files changed, 31 insertions(+), 53 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/auxiliary/util/u_surfaces.c b/src/gallium/auxiliary/util/u_surfaces.c index f6e5801ef6..7733ad24d0 100644 --- a/src/gallium/auxiliary/util/u_surfaces.c +++ b/src/gallium/auxiliary/util/u_surfaces.c @@ -3,40 +3,22 @@ #include "util/u_inlines.h" #include "util/u_memory.h" -/* TODO: ouch, util_hash_table should do these by default when passed a null function pointer - * this indirect function call is quite bad - */ -static unsigned -hash(void *key) -{ - return (unsigned)(uintptr_t)key; -} - -static int -compare(void *key1, void *key2) -{ - return (unsigned)(uintptr_t)key1 - (unsigned)(uintptr_t)key2; -} - struct pipe_surface * util_surfaces_do_get(struct util_surfaces *us, unsigned surface_struct_size, struct pipe_screen *pscreen, struct pipe_resource *pt, unsigned face, unsigned level, unsigned zslice, unsigned flags) { struct pipe_surface *ps; - void *key = NULL; if(pt->target == PIPE_TEXTURE_3D || pt->target == PIPE_TEXTURE_CUBE) - { /* or 2D array */ - if(!us->u.table) - us->u.table = util_hash_table_create(hash, compare); - key = (void *)(uintptr_t)(((zslice + face) << 8) | level); - /* TODO: ouch, should have a get-reference function... - * also, shouldn't allocate a two-pointer structure for each item... */ - ps = util_hash_table_get(us->u.table, key); + { /* or 2D array */ + if(!us->u.hash) + us->u.hash = cso_hash_create(); + + ps = cso_hash_iter_data(cso_hash_find(us->u.hash, ((zslice + face) << 8) | level)); } else { if(!us->u.array) - us->u.array = CALLOC(pt->last_level + 1, sizeof(struct pipe_surface *)); + us->u.array = CALLOC(pt->last_level + 1, sizeof(struct pipe_surface *)); ps = us->u.array[level]; } @@ -54,7 +36,7 @@ util_surfaces_do_get(struct util_surfaces *us, unsigned surface_struct_size, str ps->offset = ~0; if(pt->target == PIPE_TEXTURE_3D || pt->target == PIPE_TEXTURE_CUBE) - util_hash_table_set(us->u.table, key, ps); + cso_hash_insert(us->u.hash, ((zslice + face) << 8) | level, ps); else us->u.array[level] = ps; @@ -66,47 +48,44 @@ util_surfaces_do_detach(struct util_surfaces *us, struct pipe_surface *ps) { struct pipe_resource *pt = ps->texture; if(pt->target == PIPE_TEXTURE_3D || pt->target == PIPE_TEXTURE_CUBE) - { /* or 2D array */ - void* key = (void*)(uintptr_t)(((ps->zslice + ps->face) << 8) | ps->level); - util_hash_table_remove(us->u.table, key); + { /* or 2D array */ + cso_hash_erase(us->u.hash, cso_hash_find(us->u.hash, ((ps->zslice + ps->face) << 8) | ps->level)); } else us->u.array[ps->level] = 0; } -static enum pipe_error -util_surfaces_destroy_callback(void *key, void *value, void *data) -{ - void (*destroy_surface) (struct pipe_surface * ps) = data; - destroy_surface((struct pipe_surface *)value); - return PIPE_OK; -} - void util_surfaces_destroy(struct util_surfaces *us, struct pipe_resource *pt, void (*destroy_surface) (struct pipe_surface *)) { if(pt->target == PIPE_TEXTURE_3D || pt->target == PIPE_TEXTURE_CUBE) - { /* or 2D array */ - if(us->u.table) + { /* or 2D array */ + if(us->u.hash) { - util_hash_table_foreach(us->u.table, util_surfaces_destroy_callback, destroy_surface); - util_hash_table_destroy(us->u.table); - us->u.table = NULL; + struct cso_hash_iter iter; + iter = cso_hash_first_node(us->u.hash); + while (!cso_hash_iter_is_null(iter)) { + destroy_surface(cso_hash_iter_data(iter)); + iter = cso_hash_iter_next(iter); + } + + cso_hash_delete(us->u.hash); + us->u.hash = NULL; } } else { if(us->u.array) { - unsigned i; - for(i = 0; i <= pt->last_level; ++i) - { - struct pipe_surface *ps = us->u.array[i]; - if(ps) - destroy_surface(ps); - } - FREE(us->u.array); - us->u.array = NULL; + unsigned i; + for(i = 0; i <= pt->last_level; ++i) + { + struct pipe_surface *ps = us->u.array[i]; + if(ps) + destroy_surface(ps); + } + FREE(us->u.array); + us->u.array = NULL; } } } diff --git a/src/gallium/auxiliary/util/u_surfaces.h b/src/gallium/auxiliary/util/u_surfaces.h index 0195bf5afb..79ecd31bd7 100644 --- a/src/gallium/auxiliary/util/u_surfaces.h +++ b/src/gallium/auxiliary/util/u_surfaces.h @@ -4,14 +4,13 @@ #include "pipe/p_compiler.h" #include "pipe/p_state.h" #include "util/u_atomic.h" - -struct util_hash_table; +#include "cso_cache/cso_hash.h" struct util_surfaces { union { - struct util_hash_table *table; + struct cso_hash *hash; struct pipe_surface **array; } u; }; -- cgit v1.2.3 From 5668526c915a1e31036386d117c536592dae6859 Mon Sep 17 00:00:00 2001 From: Luca Barbieri Date: Tue, 3 Aug 2010 22:19:30 +0200 Subject: u_surfaces: add util_surfaces_peek Used to find out if a surface exists without creating one. --- src/gallium/auxiliary/util/u_surfaces.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'src/gallium') diff --git a/src/gallium/auxiliary/util/u_surfaces.h b/src/gallium/auxiliary/util/u_surfaces.h index 79ecd31bd7..af978c7057 100644 --- a/src/gallium/auxiliary/util/u_surfaces.h +++ b/src/gallium/auxiliary/util/u_surfaces.h @@ -12,6 +12,7 @@ struct util_surfaces { struct cso_hash *hash; struct pipe_surface **array; + void* pv; } u; }; @@ -34,6 +35,18 @@ util_surfaces_get(struct util_surfaces *us, unsigned surface_struct_size, struct return util_surfaces_do_get(us, surface_struct_size, pscreen, pt, face, level, zslice, flags); } +static INLINE struct pipe_surface * +util_surfaces_peek(struct util_surfaces *us, struct pipe_resource *pt, unsigned face, unsigned level, unsigned zslice) +{ + if(!us->u.pv) + return 0; + + if(unlikely(pt->target == PIPE_TEXTURE_3D || pt->target == PIPE_TEXTURE_CUBE)) + return cso_hash_iter_data(cso_hash_find(us->u.hash, ((zslice + face) << 8) | level)); + else + return us->u.array[level]; +} + void util_surfaces_do_detach(struct util_surfaces *us, struct pipe_surface *ps); static INLINE void -- cgit v1.2.3 From 6f3c4819ecf397cf9efee0213e628e15c1bc704d Mon Sep 17 00:00:00 2001 From: Luca Barbieri Date: Tue, 10 Aug 2010 10:26:14 +0200 Subject: gallium: add ALWAYS_INLINE Used when we want to be sure the compiler inlines a large function into an inner loop. --- src/gallium/include/pipe/p_compiler.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'src/gallium') diff --git a/src/gallium/include/pipe/p_compiler.h b/src/gallium/include/pipe/p_compiler.h index 0358c14e24..1fa3ec8300 100644 --- a/src/gallium/include/pipe/p_compiler.h +++ b/src/gallium/include/pipe/p_compiler.h @@ -102,6 +102,16 @@ typedef unsigned char boolean; # endif #endif +/* Forced function inlining */ +#ifndef ALWAYS_INLINE +# ifdef __GNUC__ +# define ALWAYS_INLINE inline __attribute__((always_inline)) +# elif defined(_MSC_VER) +# define ALWAYS_INLINE __forceinline +# else +# define ALWAYS_INLINE INLINE +# endif +#endif /* Function visibility */ #ifndef PUBLIC -- cgit v1.2.3 From b85c71d4e1e4ed788be834dff5b7b3c0cd0402ac Mon Sep 17 00:00:00 2001 From: Luca Barbieri Date: Tue, 3 Aug 2010 21:20:53 +0200 Subject: auxiliary: support for transfers using staging resources Direct3D 10/11 has no concept of transfers. Applications instead create resources with a STAGING or DYNAMIC usage, copy between them and the real resource and use Map to map the STAGING/DYNAMIC resource. This util module allows to implement Gallium drivers as a Direct3D driver would be implemented: transfers allocate a resource with PIPE_USAGE_STAGING, and copy the data between it and the real resource with resource_copy_region. --- src/gallium/auxiliary/Makefile | 1 + src/gallium/auxiliary/util/u_staging.c | 93 ++++++++++++++++++++++++++++++++++ src/gallium/auxiliary/util/u_staging.h | 29 +++++++++++ 3 files changed, 123 insertions(+) create mode 100644 src/gallium/auxiliary/util/u_staging.c create mode 100644 src/gallium/auxiliary/util/u_staging.h (limited to 'src/gallium') diff --git a/src/gallium/auxiliary/Makefile b/src/gallium/auxiliary/Makefile index 843b72bc38..9544e90a96 100644 --- a/src/gallium/auxiliary/Makefile +++ b/src/gallium/auxiliary/Makefile @@ -131,6 +131,7 @@ C_SOURCES = \ util/u_sampler.c \ util/u_simple_shaders.c \ util/u_snprintf.c \ + util/u_staging.c \ util/u_surface.c \ util/u_surfaces.c \ util/u_texture.c \ diff --git a/src/gallium/auxiliary/util/u_staging.c b/src/gallium/auxiliary/util/u_staging.c new file mode 100644 index 0000000000..4853aaf6d9 --- /dev/null +++ b/src/gallium/auxiliary/util/u_staging.c @@ -0,0 +1,93 @@ +#include "util/u_staging.h" +#include "pipe/p_context.h" +#include "util/u_memory.h" +#include "util/u_inlines.h" + +static void +util_staging_resource_template(struct pipe_resource *pt, unsigned width, unsigned height, unsigned depth, struct pipe_resource *template) +{ + memset(template, 0, sizeof(struct pipe_resource)); + if(pt->target != PIPE_BUFFER && depth <= 1) + template->target = PIPE_TEXTURE_2D; + else + template->target = pt->target; + template->format = pt->format; + template->width0 = width; + template->height0 = height; + template->depth0 = depth; + template->last_level = 0; + template->nr_samples = pt->nr_samples; + template->bind = 0; + template->usage = PIPE_USAGE_STAGING; + template->flags = 0; +} + +inline struct util_staging_transfer * +util_staging_transfer_new(struct pipe_context *pipe, + struct pipe_resource *pt, + struct pipe_subresource sr, + unsigned usage, + const struct pipe_box *box, + bool direct) +{ + struct pipe_screen *pscreen = pipe->screen; + struct util_staging_transfer *tx; + struct pipe_resource staging_resource_template; + + tx = CALLOC_STRUCT(util_staging_transfer); + if (!tx) + return NULL; + + pipe_resource_reference(&tx->base.resource, pt); + tx->base.sr = sr; + tx->base.usage = usage; + tx->base.box = *box; + + if (direct) + { + tx->staging_resource = pt; + return tx; + } + + util_staging_resource_template(pt, box->width, box->height, box->depth, &staging_resource_template); + tx->staging_resource = pscreen->resource_create(pscreen, &staging_resource_template); + if (!tx->staging_resource) + { + pipe_resource_reference(&tx->base.resource, NULL); + FREE(tx); + return NULL; + } + + if (usage & PIPE_TRANSFER_READ) + { + struct pipe_subresource dstsr; + dstsr.face = 0; + dstsr.level = 0; + for(unsigned zi = 0; zi < box->depth; ++zi) + pipe->resource_copy_region(pipe, tx->staging_resource, dstsr, 0, 0, 0, tx->base.resource, sr, box->x, box->y, box->z + zi, box->width, box->height); + } + + return tx; +} + +void +util_staging_transfer_destroy(struct pipe_context *pipe, struct pipe_transfer *ptx) +{ + struct util_staging_transfer *tx = (struct util_staging_transfer *)ptx; + + if (tx->staging_resource != tx->base.resource) + { + if(tx->base.usage & PIPE_TRANSFER_WRITE) { + struct pipe_subresource srcsr; + srcsr.face = 0; + srcsr.level = 0; + for(unsigned zi = 0; zi < tx->base.box.depth; ++zi) + pipe->resource_copy_region(pipe, tx->base.resource, tx->base.sr, tx->base.box.x, tx->base.box.y, tx->base.box.z + zi, tx->staging_resource, srcsr, 0, 0, 0, tx->base.box.width, tx->base.box.height); + } + + pipe_resource_reference(&tx->staging_resource, NULL); + } + + pipe_resource_reference(&ptx->resource, NULL); + FREE(ptx); +} diff --git a/src/gallium/auxiliary/util/u_staging.h b/src/gallium/auxiliary/util/u_staging.h new file mode 100644 index 0000000000..f5976dadb1 --- /dev/null +++ b/src/gallium/auxiliary/util/u_staging.h @@ -0,0 +1,29 @@ +/* Implement transfers using staging resources like in DirectX 10/11 */ + +#ifndef U_STAGING_H +#define U_STAGING_H + +#include "pipe/p_state.h" + +struct util_staging_transfer { + struct pipe_transfer base; + + /* if direct, same as base.resource, otherwise the temporary staging resource */ + struct pipe_resource *staging_resource; +}; + +/* user must be stride, slice_stride and offset */ +/* pt->usage == PIPE_USAGE_DYNAMIC should be a good value to pass for direct */ +/* staging resource is currently created with PIPE_USAGE_DYNAMIC */ +struct util_staging_transfer * +util_staging_transfer_new(struct pipe_context *pipe, + struct pipe_resource *pt, + struct pipe_subresource sr, + unsigned usage, + const struct pipe_box *box, + bool direct); + +void +util_staging_transfer_destroy(struct pipe_context *pipe, struct pipe_transfer *ptx); + +#endif -- cgit v1.2.3 From 4b20ad7559271a7785193094a3f110ef78e65253 Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Wed, 11 Aug 2010 15:11:37 +0200 Subject: util: copy the u_staging commit message to the code --- src/gallium/auxiliary/util/u_staging.h | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'src/gallium') diff --git a/src/gallium/auxiliary/util/u_staging.h b/src/gallium/auxiliary/util/u_staging.h index f5976dadb1..602faa2971 100644 --- a/src/gallium/auxiliary/util/u_staging.h +++ b/src/gallium/auxiliary/util/u_staging.h @@ -1,4 +1,12 @@ -/* Implement transfers using staging resources like in DirectX 10/11 */ +/* Direct3D 10/11 has no concept of transfers. Applications instead + * create resources with a STAGING or DYNAMIC usage, copy between them + * and the real resource and use Map to map the STAGING/DYNAMIC resource. + * + * This util module allows to implement Gallium drivers as a Direct3D + * driver would be implemented: transfers allocate a resource with + * PIPE_USAGE_STAGING, and copy the data between it and the real resource + * with resource_copy_region. + */ #ifndef U_STAGING_H #define U_STAGING_H -- cgit v1.2.3 From 3deca2e284f0709e94bec5267febc7ccb34c17b8 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Wed, 11 Aug 2010 15:09:54 +0100 Subject: llvmpipe: Use single precision divide for one over area computation. --- src/gallium/drivers/llvmpipe/lp_setup_tri.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium') diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c index 7e432503c1..44696c73b4 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c @@ -525,7 +525,7 @@ do_triangle_ccw(struct lp_setup_context *setup, info.dx20 = info.v2[0][0] - info.v0[0][0]; info.dy01 = info.v0[0][1] - info.v1[0][1]; info.dy20 = info.v2[0][1] - info.v0[0][1]; - info.oneoverarea = 1.0 / (info.dx01 * info.dy20 - info.dx20 * info.dy01); + info.oneoverarea = 1.0f / (info.dx01 * info.dy20 - info.dx20 * info.dy01); info.frontfacing = frontfacing; /* Setup parameter interpolants: -- cgit v1.2.3 From 66f57235d5e507b17f3cbf1329a803337ca08666 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Wed, 11 Aug 2010 15:10:28 +0100 Subject: llvmpipe: Debug code to dump interpolation coefficients. --- src/gallium/drivers/llvmpipe/lp_setup_tri.c | 30 ++++++++++++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) (limited to 'src/gallium') diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c index 44696c73b4..20e63ae51f 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c @@ -260,13 +260,13 @@ static void setup_tri_coefficients( struct lp_setup_context *setup, { unsigned fragcoord_usage_mask = TGSI_WRITEMASK_XYZ; unsigned slot; + unsigned i; /* setup interpolation for all the remaining attributes: */ for (slot = 0; slot < setup->fs.nr_inputs; slot++) { unsigned vert_attr = setup->fs.input[slot].src_index; unsigned usage_mask = setup->fs.input[slot].usage_mask; - unsigned i; switch (setup->fs.input[slot].interp) { case LP_INTERP_CONSTANT: @@ -316,6 +316,34 @@ static void setup_tri_coefficients( struct lp_setup_context *setup, /* The internal position input is in slot zero: */ setup_fragcoord_coef(tri, info, 0, fragcoord_usage_mask); + + if (0) { + for (i = 0; i < NUM_CHANNELS; i++) { + float a0 = tri->inputs.a0 [0][i]; + float dadx = tri->inputs.dadx[0][i]; + float dady = tri->inputs.dady[0][i]; + + debug_printf("POS.%c: a0 = %f, dadx = %f, dady = %f\n", + "xyzw"[i], + a0, dadx, dady); + } + + for (slot = 0; slot < setup->fs.nr_inputs; slot++) { + unsigned usage_mask = setup->fs.input[slot].usage_mask; + for (i = 0; i < NUM_CHANNELS; i++) { + if (usage_mask & (1 << i)) { + float a0 = tri->inputs.a0 [1 + slot][i]; + float dadx = tri->inputs.dadx[1 + slot][i]; + float dady = tri->inputs.dady[1 + slot][i]; + + debug_printf("IN[%u].%c: a0 = %f, dadx = %f, dady = %f\n", + slot, + "xyzw"[i], + a0, dadx, dady); + } + } + } + } } -- cgit v1.2.3 From f8533482f4a9b5ee7107f4e653d4ebf99ac63e2e Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Wed, 11 Aug 2010 15:11:12 +0100 Subject: gallivm: Use unsigned shift in lp_build_minify. Texture dimensions are unsigned. --- src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium') diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c index 955d328953..665b010ece 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c @@ -811,7 +811,7 @@ lp_build_minify(struct lp_build_sample_context *bld, LLVMValueRef base_size, LLVMValueRef level) { - LLVMValueRef size = LLVMBuildAShr(bld->builder, base_size, level, "minify"); + LLVMValueRef size = LLVMBuildLShr(bld->builder, base_size, level, "minify"); size = lp_build_max(&bld->int_coord_bld, size, bld->int_coord_bld.one); return size; } -- cgit v1.2.3 From 10ce6779e8a64c33add70e440f885c210f3fa6ee Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Wed, 11 Aug 2010 15:13:17 +0100 Subject: gallivm: Use lp_build_div instead of lp_build_mul + lp_build_rcp. Single divide, so let lp_build_div decide how to implement this. This will save a multiplication in architectures which don't have a RCP intrinsic. --- src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c index 665b010ece..307506507d 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c @@ -1219,8 +1219,7 @@ lp_build_cube_ima(struct lp_build_context *coord_bld, LLVMValueRef coord) /* ima = -0.5 / abs(coord); */ LLVMValueRef negHalf = lp_build_const_vec(coord_bld->type, -0.5); LLVMValueRef absCoord = lp_build_abs(coord_bld, coord); - LLVMValueRef ima = lp_build_mul(coord_bld, negHalf, - lp_build_rcp(coord_bld, absCoord)); + LLVMValueRef ima = lp_build_div(coord_bld, negHalf, absCoord); return ima; } -- cgit v1.2.3 From deb809ec98664257ed215e75554e1a80c149c851 Mon Sep 17 00:00:00 2001 From: Luca Barbieri Date: Fri, 6 Aug 2010 11:08:45 +0200 Subject: auxiliary: fix util_framebuffer_copy util_framebuffer_copy was attempting to copy all elements of the source framebuffer state. However, this breaks if the user does not zero initialize the structure. Instead, only copy the elements up to nr_cbufs, and clear elements up to dst->nr_cbufs, if the destination was larger than the source. --- src/gallium/auxiliary/util/u_framebuffer.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/auxiliary/util/u_framebuffer.c b/src/gallium/auxiliary/util/u_framebuffer.c index 768ae9ceb5..7803ec6a8b 100644 --- a/src/gallium/auxiliary/util/u_framebuffer.c +++ b/src/gallium/auxiliary/util/u_framebuffer.c @@ -85,9 +85,11 @@ util_copy_framebuffer_state(struct pipe_framebuffer_state *dst, dst->width = src->width; dst->height = src->height; - for (i = 0; i < Elements(src->cbufs); i++) { + for (i = 0; i < src->nr_cbufs; i++) pipe_surface_reference(&dst->cbufs[i], src->cbufs[i]); - } + + for (i = src->nr_cbufs; i < dst->nr_cbufs; i++) + pipe_surface_reference(&dst->cbufs[i], NULL); dst->nr_cbufs = src->nr_cbufs; -- cgit v1.2.3 From 16b45ca7cefb3432b4133fe9d0b1dbfe3f286131 Mon Sep 17 00:00:00 2001 From: Luca Barbieri Date: Wed, 11 Aug 2010 15:59:43 +0200 Subject: translate_generic: return NULL instead of assert(0) if format not supported This gives the caller a chance to recover (or crash anyway otherwise). --- src/gallium/auxiliary/translate/translate_generic.c | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/auxiliary/translate/translate_generic.c b/src/gallium/auxiliary/translate/translate_generic.c index 4d1977229e..809a4e47f4 100644 --- a/src/gallium/auxiliary/translate/translate_generic.c +++ b/src/gallium/auxiliary/translate/translate_generic.c @@ -201,12 +201,6 @@ emit_B8G8R8A8_UNORM( const float *attrib, void *ptr) out[3] = TO_8_UNORM(attrib[3]); } -static void -emit_NULL( const float *attrib, void *ptr ) -{ - /* do nothing is the only sensible option */ -} - static emit_func get_emit_func( enum pipe_format format ) { switch (format) { @@ -343,8 +337,7 @@ static emit_func get_emit_func( enum pipe_format format ) return &emit_A8R8G8B8_UNORM; default: - assert(0); - return &emit_NULL; + return NULL; } } @@ -539,8 +532,12 @@ struct translate *translate_generic_create( const struct translate_key *key ) tg->attrib[i].instance_divisor = key->element[i].instance_divisor; tg->attrib[i].emit = get_emit_func(key->element[i].output_format); + if(!tg->attrib[i].emit) + { + FREE(tg); + return NULL; + } tg->attrib[i].output_offset = key->element[i].output_offset; - } tg->nr_attrib = key->nr_elements; -- cgit v1.2.3 From 945e38c73b46afc12b0655f9fc8200f216a8f8c1 Mon Sep 17 00:00:00 2001 From: Luca Barbieri Date: Wed, 11 Aug 2010 15:57:37 +0200 Subject: translate: add testsuite Currently only checks all possible format conversions, and doesn't attempt to test whether multiple buffers/elements or indices work. --- src/gallium/tests/unit/Makefile | 3 +- src/gallium/tests/unit/translate_test.c | 170 ++++++++++++++++++++++++++++++++ 2 files changed, 172 insertions(+), 1 deletion(-) create mode 100644 src/gallium/tests/unit/translate_test.c (limited to 'src/gallium') diff --git a/src/gallium/tests/unit/Makefile b/src/gallium/tests/unit/Makefile index f65958dadd..345bd1f694 100644 --- a/src/gallium/tests/unit/Makefile +++ b/src/gallium/tests/unit/Makefile @@ -22,7 +22,8 @@ SOURCES = \ pipe_barrier_test.c \ u_cache_test.c \ u_half_test.c \ - u_format_test.c + u_format_test.c \ + translate_test.c OBJECTS = $(SOURCES:.c=.o) diff --git a/src/gallium/tests/unit/translate_test.c b/src/gallium/tests/unit/translate_test.c new file mode 100644 index 0000000000..473c73afaa --- /dev/null +++ b/src/gallium/tests/unit/translate_test.c @@ -0,0 +1,170 @@ +/************************************************************************** + * + * Copyright © 2010 Luca Barbieri + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include +#include +#include +#include + +int main(int argc, char** argv) +{ + struct translate *(*create_fn)( const struct translate_key *key ) = 0; + + struct translate_key key; + unsigned output_format; + unsigned input_format; + unsigned char* buffer[5]; + unsigned count = 4; + unsigned i, j, k; + unsigned passed = 0; + unsigned total = 0; + + create_fn = 0; + + if(argc > 1 && !strcmp(argv[1], "generic")) + create_fn = translate_generic_create; + else if(argc > 1 && !strcmp(argv[1], "x86")) + create_fn = translate_sse2_create; + + if(!create_fn) + { + printf("Usage: ./translate_test [generic|x86]\n"); + return 1; + } + + for(i = 0; i < Elements(buffer); ++i) + buffer[i] = align_malloc(4096, 4096); + + key.nr_elements = 1; + key.element[0].input_buffer = 0; + key.element[0].input_offset = 0; + key.element[0].output_offset = 0; + key.element[0].type = TRANSLATE_ELEMENT_NORMAL; + key.element[0].instance_divisor = 0; + + srand(4359025); + for(i = 0; i < 4096; ++i) + buffer[0][i] = rand() & 0x7f; /* avoid negative values that work badly when converted to unsigned format*/ + + for(output_format = 0; output_format < PIPE_FORMAT_COUNT; ++output_format) + { + const struct util_format_description* output_format_desc = util_format_description(output_format); + unsigned output_format_size = util_format_get_stride(output_format, 1); + if(!output_format_desc + || !output_format_desc->fetch_rgba_float + || !output_format_desc->pack_rgba_float + || output_format_desc->colorspace != UTIL_FORMAT_COLORSPACE_RGB + || output_format_desc->layout != UTIL_FORMAT_LAYOUT_PLAIN + ) + continue; + + for(input_format = 0; input_format < PIPE_FORMAT_COUNT; ++input_format) + { + const struct util_format_description* input_format_desc = util_format_description(input_format); + unsigned input_format_size = util_format_get_stride(input_format, 1); + struct translate* translate[2]; + unsigned fail = 0; + + if(!input_format_desc + || !input_format_desc->fetch_rgba_float + || !input_format_desc->pack_rgba_float + || input_format_desc->colorspace != UTIL_FORMAT_COLORSPACE_RGB + || input_format_desc->layout != UTIL_FORMAT_LAYOUT_PLAIN + ) + continue; + + key.element[0].input_format = input_format; + key.element[0].output_format = output_format; + key.output_stride = output_format_size; + translate[0] = create_fn(&key); + if(!translate[0]) + continue; + + key.element[0].input_format = output_format; + key.element[0].output_format = input_format; + key.output_stride = input_format_size; + translate[1] = create_fn(&key); + if(!translate[1]) + goto free_translate0; + + translate[0]->set_buffer(translate[0], 0, buffer[0], input_format_size, ~0); + translate[0]->run(translate[0], 0, count, 0, buffer[1]); + translate[1]->set_buffer(translate[1], 0, buffer[1], output_format_size, ~0); + translate[1]->run(translate[1], 0, count, 0, buffer[2]); + translate[0]->set_buffer(translate[0], 0, buffer[2], input_format_size, ~0); + translate[0]->run(translate[0], 0, count, 0, buffer[3]); + translate[1]->set_buffer(translate[1], 0, buffer[3], output_format_size, ~0); + translate[1]->run(translate[1], 0, count, 0, buffer[4]); + + for(i = 0; i < count; ++i) + { + float a[4]; + float b[4]; + float error = 0.03125; + input_format_desc->fetch_rgba_float(a, buffer[2] + i * input_format_size, 0, 0); + input_format_desc->fetch_rgba_float(b, buffer[4] + i * input_format_size, 0, 0); + + for(j = 0; j < count; ++j) + { + float d = a[j] - b[j]; + if(d > error || d < -error) + { + fail = 1; + break; + } + } + } + + if(fail) + { + printf("FAIL: %s -> %s -> %s -> %s -> %s\n", input_format_desc->name, output_format_desc->name, input_format_desc->name, output_format_desc->name, input_format_desc->name); + for(i = 0; i < Elements(buffer); ++i) + { + unsigned format_size = (i & 1) ? output_format_size : input_format_size; + printf("%c ", (i == 2 || i == 4) ? '*' : ' '); + for(j = 0; j < count; ++j) + { + for(k = 0; k < format_size; ++k) + { + printf("%02x", buffer[i][j * format_size + k]); + } + printf(" "); + } + printf("\n"); + } + } + else + ++passed; + ++total; + + translate[1]->release(translate[1]); +free_translate0: + translate[0]->release(translate[0]); + } + } + + printf("%u/%u tests passed for translate_%s\n", passed, total, argv[1]); + return 0; +} -- cgit v1.2.3 From 684aeb366fb72a2dbf9dd7a0803f29880858cd06 Mon Sep 17 00:00:00 2001 From: Luca Barbieri Date: Wed, 11 Aug 2010 16:00:32 +0200 Subject: translate_generic: fix broken A8R8G8B8_UNORM output translate was attempting to output A8R8G8B8_UNORM as if it were R8G8B8A8_UNORM. Now the tests just added pass. --- src/gallium/auxiliary/translate/translate_generic.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/auxiliary/translate/translate_generic.c b/src/gallium/auxiliary/translate/translate_generic.c index 809a4e47f4..021099ae98 100644 --- a/src/gallium/auxiliary/translate/translate_generic.c +++ b/src/gallium/auxiliary/translate/translate_generic.c @@ -187,9 +187,15 @@ ATTRIB( R8G8B8_SNORM, 3, char, TO_8_SNORM ) ATTRIB( R8G8_SNORM, 2, char, TO_8_SNORM ) ATTRIB( R8_SNORM, 1, char, TO_8_SNORM ) -ATTRIB( A8R8G8B8_UNORM, 4, ubyte, TO_8_UNORM ) -/*ATTRIB( R8G8B8A8_UNORM, 4, ubyte, TO_8_UNORM )*/ - +static void +emit_A8R8G8B8_UNORM( const float *attrib, void *ptr) +{ + ubyte *out = (ubyte *)ptr; + out[0] = TO_8_UNORM(attrib[3]); + out[1] = TO_8_UNORM(attrib[0]); + out[2] = TO_8_UNORM(attrib[1]); + out[3] = TO_8_UNORM(attrib[2]); +} static void emit_B8G8R8A8_UNORM( const float *attrib, void *ptr) -- cgit v1.2.3 From 5266bc3c60033ccc3515b3a6d02f7219f981cbb1 Mon Sep 17 00:00:00 2001 From: Luca Barbieri Date: Wed, 11 Aug 2010 16:59:25 +0200 Subject: Revert "translate_generic: return NULL instead of assert(0) if format not supported" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit 16b45ca7cefb3432b4133fe9d0b1dbfe3f286131. José Fonseca asked for a revert. Note that the testsuite will now segfault since it attempts to test all possible formats. --- src/gallium/auxiliary/translate/translate_generic.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/auxiliary/translate/translate_generic.c b/src/gallium/auxiliary/translate/translate_generic.c index 021099ae98..0baa49a192 100644 --- a/src/gallium/auxiliary/translate/translate_generic.c +++ b/src/gallium/auxiliary/translate/translate_generic.c @@ -207,6 +207,12 @@ emit_B8G8R8A8_UNORM( const float *attrib, void *ptr) out[3] = TO_8_UNORM(attrib[3]); } +static void +emit_NULL( const float *attrib, void *ptr ) +{ + /* do nothing is the only sensible option */ +} + static emit_func get_emit_func( enum pipe_format format ) { switch (format) { @@ -343,7 +349,8 @@ static emit_func get_emit_func( enum pipe_format format ) return &emit_A8R8G8B8_UNORM; default: - return NULL; + assert(0); + return &emit_NULL; } } @@ -538,12 +545,8 @@ struct translate *translate_generic_create( const struct translate_key *key ) tg->attrib[i].instance_divisor = key->element[i].instance_divisor; tg->attrib[i].emit = get_emit_func(key->element[i].output_format); - if(!tg->attrib[i].emit) - { - FREE(tg); - return NULL; - } tg->attrib[i].output_offset = key->element[i].output_offset; + } tg->nr_attrib = key->nr_elements; -- cgit v1.2.3 From 457378e031ffb89a2011604c7798a6f5f2142207 Mon Sep 17 00:00:00 2001 From: Jerome Glisse Date: Wed, 11 Aug 2010 12:19:33 -0400 Subject: r600g: add point/sprite rendering support Signed-off-by: Jerome Glisse --- src/gallium/drivers/r600/r600_shader.c | 60 ++++++++++++++++++++----- src/gallium/drivers/r600/r600_state.c | 23 ++++++++-- src/gallium/drivers/r600/r600d.h | 80 ++++++++++++++++++++++++++++++++++ 3 files changed, 149 insertions(+), 14 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index 8a778f5fd6..ca65bff24c 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -105,8 +105,8 @@ int r600_pipe_shader_create(struct pipe_context *ctx, struct r600_screen *rscreen = r600_screen(ctx->screen); int r; -//fprintf(stderr, "--------------------------------------------------------------\n"); -//tgsi_dump(tokens, 0); +fprintf(stderr, "--------------------------------------------------------------\n"); +tgsi_dump(tokens, 0); if (rpshader == NULL) return -ENOMEM; rpshader->shader.family = radeon_get_family(rscreen->rw); @@ -120,7 +120,7 @@ int r600_pipe_shader_create(struct pipe_context *ctx, R600_ERR("building bytecode failed !\n"); return r; } -//fprintf(stderr, "______________________________________________________________\n"); +fprintf(stderr, "______________________________________________________________\n"); return 0; } @@ -155,11 +155,14 @@ static int r600_pipe_shader_vs(struct pipe_context *ctx, struct r600_context_sta static int r600_pipe_shader_ps(struct pipe_context *ctx, struct r600_context_state *rpshader) { + const struct pipe_rasterizer_state *rasterizer; struct r600_screen *rscreen = r600_screen(ctx->screen); struct r600_shader *rshader = &rpshader->shader; + struct r600_context *rctx = r600_context(ctx); struct radeon_state *state; unsigned i, tmp, exports_ps, num_cout; + rasterizer = &rctx->rasterizer->state.rasterizer; rpshader->rstate = radeon_state_decref(rpshader->rstate); state = radeon_state(rscreen->rw, R600_PS_SHADER_TYPE, R600_PS_SHADER); if (state == NULL) @@ -171,6 +174,9 @@ static int r600_pipe_shader_ps(struct pipe_context *ctx, struct r600_context_sta rshader->input[i].name == TGSI_SEMANTIC_BCOLOR) { tmp |= S_028644_FLAT_SHADE(rshader->flat_shade); } + if (rasterizer->sprite_coord_enable & (1 << i)) { + tmp |= S_028644_PT_SPRITE_TEX(1); + } state->states[R600_PS_SHADER__SPI_PS_INPUT_CNTL_0 + i] = tmp; } @@ -340,7 +346,7 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s struct tgsi_full_immediate *immediate; struct r600_shader_ctx ctx; struct r600_bc_output output[32]; - unsigned output_done; + unsigned output_done, noutput; unsigned opcode; int i, r = 0, pos0; @@ -418,7 +424,8 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s } } /* export output */ - for (i = 0, pos0 = 0; i < shader->noutput; i++) { + noutput = shader->noutput; + for (i = 0, pos0 = 0; i < noutput; i++) { memset(&output[i], 0, sizeof(struct r600_bc_output)); output[i].gpr = shader->output[i].gpr; output[i].elem_size = 3; @@ -430,13 +437,19 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM; output[i].array_base = i - pos0; output[i].inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT; - switch (ctx.type == TGSI_PROCESSOR_VERTEX) { + switch (ctx.type) { case TGSI_PROCESSOR_VERTEX: if (shader->output[i].name == TGSI_SEMANTIC_POSITION) { output[i].array_base = 60; output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; /* position doesn't count in array_base */ - pos0 = 1; + pos0++; + } + if (shader->output[i].name == TGSI_SEMANTIC_PSIZE) { + output[i].array_base = 61; + output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; + /* position doesn't count in array_base */ + pos0++; } break; case TGSI_PROCESSOR_FRAGMENT: @@ -457,17 +470,42 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s r = -EINVAL; goto out_err; } - if (i == (shader->noutput - 1)) { - output[i].end_of_program = 1; + } + /* add fake param output for vertex shader if no param is exported */ + if (ctx.type == TGSI_PROCESSOR_VERTEX) { + for (i = 0, pos0 = 0; i < noutput; i++) { + if (output[i].type == V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM) { + pos0 = 1; + break; + } + } + if (!pos0) { + memset(&output[i], 0, sizeof(struct r600_bc_output)); + output[i].gpr = 0; + output[i].elem_size = 3; + output[i].swizzle_x = 0; + output[i].swizzle_y = 1; + output[i].swizzle_z = 2; + output[i].swizzle_w = 3; + output[i].barrier = 1; + output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM; + output[i].array_base = 0; + output[i].inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT; + noutput++; } } - for (i = shader->noutput - 1, output_done = 0; i >= 0; i--) { + /* set export done on last export of each type */ + for (i = noutput - 1, output_done = 0; i >= 0; i--) { + if (i == (noutput - 1)) { + output[i].end_of_program = 1; + } if (!(output_done & (1 << output[i].type))) { output_done |= (1 << output[i].type); output[i].inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE; } } - for (i = 0; i < shader->noutput; i++) { + /* add output to bytecode */ + for (i = 0; i < noutput; i++) { r = r600_bc_add_output(ctx.bc, &output[i]); if (r) goto out_err; diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index cad5185e32..a50b75cc79 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -769,6 +769,7 @@ static struct radeon_state *r600_rasterizer(struct r600_context *rctx) float offset_units = 0, offset_scale = 0; char depth = 0; unsigned offset_db_fmt_cntl = 0; + unsigned tmp; if (fb->zsbuf) { offset_units = state->offset_units; @@ -800,6 +801,18 @@ static struct radeon_state *r600_rasterizer(struct r600_context *rctx) if (rstate == NULL) return NULL; rstate->states[R600_RASTERIZER__SPI_INTERP_CONTROL_0] = 0x00000001; + if (state->sprite_coord_enable) { + rstate->states[R600_RASTERIZER__SPI_INTERP_CONTROL_0] |= + S_0286D4_PNT_SPRITE_ENA(1) | + S_0286D4_PNT_SPRITE_OVRD_X(2) | + S_0286D4_PNT_SPRITE_OVRD_Y(3) | + S_0286D4_PNT_SPRITE_OVRD_Z(0) | + S_0286D4_PNT_SPRITE_OVRD_W(1); + if (state->sprite_coord_mode != PIPE_SPRITE_COORD_UPPER_LEFT) { + rstate->states[R600_RASTERIZER__SPI_INTERP_CONTROL_0] |= + S_0286D4_PNT_SPRITE_TOP_1(1); + } + } rstate->states[R600_RASTERIZER__PA_CL_CLIP_CNTL] = 0x00000000; rstate->states[R600_RASTERIZER__PA_SU_SC_MODE_CNTL] = 0x00080000 | S_028814_CULL_FRONT((state->cull_face & PIPE_FACE_FRONT) ? 1 : 0) | @@ -808,10 +821,14 @@ static struct radeon_state *r600_rasterizer(struct r600_context *rctx) S_028814_POLY_OFFSET_FRONT_ENABLE(state->offset_tri) | S_028814_POLY_OFFSET_BACK_ENABLE(state->offset_tri) | S_028814_POLY_OFFSET_PARA_ENABLE(state->offset_tri); - rstate->states[R600_RASTERIZER__PA_CL_VS_OUT_CNTL] = 0x00000000; + rstate->states[R600_RASTERIZER__PA_CL_VS_OUT_CNTL] = + S_02881C_USE_VTX_POINT_SIZE(state->point_size_per_vertex) | + S_02881C_VS_OUT_MISC_VEC_ENA(state->point_size_per_vertex); rstate->states[R600_RASTERIZER__PA_CL_NANINF_CNTL] = 0x00000000; - rstate->states[R600_RASTERIZER__PA_SU_POINT_SIZE] = 0x00080008; - rstate->states[R600_RASTERIZER__PA_SU_POINT_MINMAX] = 0x00000000; + /* point size 12.4 fixed point */ + tmp = (unsigned)(state->point_size * 8.0 / 2.0); + rstate->states[R600_RASTERIZER__PA_SU_POINT_SIZE] = S_028A00_HEIGHT(tmp) | S_028A00_WIDTH(tmp); + rstate->states[R600_RASTERIZER__PA_SU_POINT_MINMAX] = 0x80000000; rstate->states[R600_RASTERIZER__PA_SU_LINE_CNTL] = 0x00000008; rstate->states[R600_RASTERIZER__PA_SC_LINE_STIPPLE] = 0x00000005; rstate->states[R600_RASTERIZER__PA_SC_MPASS_PS_CNTL] = 0x00000000; diff --git a/src/gallium/drivers/r600/r600d.h b/src/gallium/drivers/r600/r600d.h index af93731550..53388f822e 100644 --- a/src/gallium/drivers/r600/r600d.h +++ b/src/gallium/drivers/r600/r600d.h @@ -654,6 +654,13 @@ #define S_028E0C_OFFSET(x) (((x) & 0xFFFFFFFF) << 0) #define G_028E0C_OFFSET(x) (((x) >> 0) & 0xFFFFFFFF) #define C_028E0C_OFFSET 0x00000000 +#define R_028A00_PA_SU_POINT_SIZE 0x028A00 +#define S_028A00_HEIGHT(x) (((x) & 0xFFFF) << 0) +#define G_028A00_HEIGHT(x) (((x) >> 0) & 0xFFFF) +#define C_028A00_HEIGHT 0xFFFF0000 +#define S_028A00_WIDTH(x) (((x) & 0xFFFF) << 16) +#define G_028A00_WIDTH(x) (((x) >> 16) & 0xFFFF) +#define C_028A00_WIDTH 0x0000FFFF #define R_028A40_VGT_GS_MODE 0x028A40 #define S_028A40_MODE(x) (((x) & 0x3) << 0) #define G_028A40_MODE(x) (((x) >> 0) & 0x3) @@ -1153,6 +1160,79 @@ #define V_008958_DI_PT_2D_FILL_RECT_LIST 0x0000001A #define V_008958_DI_PT_2D_LINE_STRIP 0x0000001B #define V_008958_DI_PT_2D_TRI_STRIP 0x0000001C +#define R_02881C_PA_CL_VS_OUT_CNTL 0x02881C +#define S_02881C_CLIP_DIST_ENA_0(x) (((x) & 0x1) << 0) +#define G_02881C_CLIP_DIST_ENA_0(x) (((x) >> 0) & 0x1) +#define C_02881C_CLIP_DIST_ENA_0 0xFFFFFFFE +#define S_02881C_CLIP_DIST_ENA_1(x) (((x) & 0x1) << 1) +#define G_02881C_CLIP_DIST_ENA_1(x) (((x) >> 1) & 0x1) +#define C_02881C_CLIP_DIST_ENA_1 0xFFFFFFFD +#define S_02881C_CLIP_DIST_ENA_2(x) (((x) & 0x1) << 2) +#define G_02881C_CLIP_DIST_ENA_2(x) (((x) >> 2) & 0x1) +#define C_02881C_CLIP_DIST_ENA_2 0xFFFFFFFB +#define S_02881C_CLIP_DIST_ENA_3(x) (((x) & 0x1) << 3) +#define G_02881C_CLIP_DIST_ENA_3(x) (((x) >> 3) & 0x1) +#define C_02881C_CLIP_DIST_ENA_3 0xFFFFFFF7 +#define S_02881C_CLIP_DIST_ENA_4(x) (((x) & 0x1) << 4) +#define G_02881C_CLIP_DIST_ENA_4(x) (((x) >> 4) & 0x1) +#define C_02881C_CLIP_DIST_ENA_4 0xFFFFFFEF +#define S_02881C_CLIP_DIST_ENA_5(x) (((x) & 0x1) << 5) +#define G_02881C_CLIP_DIST_ENA_5(x) (((x) >> 5) & 0x1) +#define C_02881C_CLIP_DIST_ENA_5 0xFFFFFFDF +#define S_02881C_CLIP_DIST_ENA_6(x) (((x) & 0x1) << 6) +#define G_02881C_CLIP_DIST_ENA_6(x) (((x) >> 6) & 0x1) +#define C_02881C_CLIP_DIST_ENA_6 0xFFFFFFBF +#define S_02881C_CLIP_DIST_ENA_7(x) (((x) & 0x1) << 7) +#define G_02881C_CLIP_DIST_ENA_7(x) (((x) >> 7) & 0x1) +#define C_02881C_CLIP_DIST_ENA_7 0xFFFFFF7F +#define S_02881C_CULL_DIST_ENA_0(x) (((x) & 0x1) << 8) +#define G_02881C_CULL_DIST_ENA_0(x) (((x) >> 8) & 0x1) +#define C_02881C_CULL_DIST_ENA_0 0xFFFFFEFF +#define S_02881C_CULL_DIST_ENA_1(x) (((x) & 0x1) << 9) +#define G_02881C_CULL_DIST_ENA_1(x) (((x) >> 9) & 0x1) +#define C_02881C_CULL_DIST_ENA_1 0xFFFFFDFF +#define S_02881C_CULL_DIST_ENA_2(x) (((x) & 0x1) << 10) +#define G_02881C_CULL_DIST_ENA_2(x) (((x) >> 10) & 0x1) +#define C_02881C_CULL_DIST_ENA_2 0xFFFFFBFF +#define S_02881C_CULL_DIST_ENA_3(x) (((x) & 0x1) << 11) +#define G_02881C_CULL_DIST_ENA_3(x) (((x) >> 11) & 0x1) +#define C_02881C_CULL_DIST_ENA_3 0xFFFFF7FF +#define S_02881C_CULL_DIST_ENA_4(x) (((x) & 0x1) << 12) +#define G_02881C_CULL_DIST_ENA_4(x) (((x) >> 12) & 0x1) +#define C_02881C_CULL_DIST_ENA_4 0xFFFFEFFF +#define S_02881C_CULL_DIST_ENA_5(x) (((x) & 0x1) << 13) +#define G_02881C_CULL_DIST_ENA_5(x) (((x) >> 13) & 0x1) +#define C_02881C_CULL_DIST_ENA_5 0xFFFFDFFF +#define S_02881C_CULL_DIST_ENA_6(x) (((x) & 0x1) << 14) +#define G_02881C_CULL_DIST_ENA_6(x) (((x) >> 14) & 0x1) +#define C_02881C_CULL_DIST_ENA_6 0xFFFFBFFF +#define S_02881C_CULL_DIST_ENA_7(x) (((x) & 0x1) << 15) +#define G_02881C_CULL_DIST_ENA_7(x) (((x) >> 15) & 0x1) +#define C_02881C_CULL_DIST_ENA_7 0xFFFF7FFF +#define S_02881C_USE_VTX_POINT_SIZE(x) (((x) & 0x1) << 16) +#define G_02881C_USE_VTX_POINT_SIZE(x) (((x) >> 16) & 0x1) +#define C_02881C_USE_VTX_POINT_SIZE 0xFFFEFFFF +#define S_02881C_USE_VTX_EDGE_FLAG(x) (((x) & 0x1) << 17) +#define G_02881C_USE_VTX_EDGE_FLAG(x) (((x) >> 17) & 0x1) +#define C_02881C_USE_VTX_EDGE_FLAG 0xFFFDFFFF +#define S_02881C_USE_VTX_RENDER_TARGET_INDX(x) (((x) & 0x1) << 18) +#define G_02881C_USE_VTX_RENDER_TARGET_INDX(x) (((x) >> 18) & 0x1) +#define C_02881C_USE_VTX_RENDER_TARGET_INDX 0xFFFBFFFF +#define S_02881C_USE_VTX_VIEWPORT_INDX(x) (((x) & 0x1) << 19) +#define G_02881C_USE_VTX_VIEWPORT_INDX(x) (((x) >> 19) & 0x1) +#define C_02881C_USE_VTX_VIEWPORT_INDX 0xFFF7FFFF +#define S_02881C_USE_VTX_KILL_FLAG(x) (((x) & 0x1) << 20) +#define G_02881C_USE_VTX_KILL_FLAG(x) (((x) >> 20) & 0x1) +#define C_02881C_USE_VTX_KILL_FLAG 0xFFEFFFFF +#define S_02881C_VS_OUT_MISC_VEC_ENA(x) (((x) & 0x1) << 21) +#define G_02881C_VS_OUT_MISC_VEC_ENA(x) (((x) >> 21) & 0x1) +#define C_02881C_VS_OUT_MISC_VEC_ENA 0xFFDFFFFF +#define S_02881C_VS_OUT_CCDIST0_VEC_ENA(x) (((x) & 0x1) << 22) +#define G_02881C_VS_OUT_CCDIST0_VEC_ENA(x) (((x) >> 22) & 0x1) +#define C_02881C_VS_OUT_CCDIST0_VEC_ENA 0xFFBFFFFF +#define S_02881C_VS_OUT_CCDIST1_VEC_ENA(x) (((x) & 0x1) << 23) +#define G_02881C_VS_OUT_CCDIST1_VEC_ENA(x) (((x) >> 23) & 0x1) +#define C_02881C_VS_OUT_CCDIST1_VEC_ENA 0xFF7FFFFF #define R_028868_SQ_PGM_RESOURCES_VS 0x028868 #define S_028868_NUM_GPRS(x) (((x) & 0xFF) << 0) #define G_028868_NUM_GPRS(x) (((x) >> 0) & 0xFF) -- cgit v1.2.3 From b481a1237e00e1e1fb68ffca0653df3a96f21788 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Wed, 11 Aug 2010 18:44:17 +0100 Subject: gallivm: Fix and enable the extra Newton/Raphson step in lp_build_rcp(). Thanks to Michal for spotting this. --- src/gallium/auxiliary/gallivm/lp_bld_arit.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c b/src/gallium/auxiliary/gallivm/lp_bld_arit.c index cf2feeb163..816ee70119 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c @@ -1274,7 +1274,7 @@ lp_build_rcp(struct lp_build_context *bld, * when we have a better system in place to track minimum precision. */ -#if 0 +#if 1 /* * Do one Newton-Raphson step to improve precision: * @@ -1291,7 +1291,7 @@ lp_build_rcp(struct lp_build_context *bld, res = LLVMBuildFSub(bld->builder, two, res, ""); res = LLVMBuildFMul(bld->builder, res, rcp_a, ""); - return rcp_a; + return res; #else return lp_build_intrinsic_unary(bld->builder, "llvm.x86.sse.rcp.ps", lp_build_vec_type(type), a); #endif -- cgit v1.2.3 From 1bb0427a856ffa3fea1b177ea5b0395a00de3833 Mon Sep 17 00:00:00 2001 From: Jerome Glisse Date: Wed, 11 Aug 2010 13:49:06 -0400 Subject: r600g: add src negation support Should fix few glBitmap cases. Signed-off-by: Jerome Glisse --- src/gallium/drivers/r600/r600_shader.c | 1 + 1 file changed, 1 insertion(+) (limited to 'src/gallium') diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index ca65bff24c..cbeb69221c 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -537,6 +537,7 @@ static int tgsi_src(struct r600_shader_ctx *ctx, if (tgsi_src->Register.File == TGSI_FILE_IMMEDIATE) { r600_src->sel = 0; } + r600_src->neg = tgsi_src->Register.Negate; r600_src->sel += ctx->file_offset[tgsi_src->Register.File]; return 0; } -- cgit v1.2.3 From 481b65abaedb271d0da24c75b8c60f7bcf6d8ce9 Mon Sep 17 00:00:00 2001 From: Jerome Glisse Date: Wed, 11 Aug 2010 14:26:07 -0400 Subject: r600g: accept empty frag prog shader Signed-off-by: Jerome Glisse --- src/gallium/drivers/r600/r600_asm.c | 3 +-- src/gallium/drivers/r600/r600_shader.c | 15 +++++++++++++++ src/gallium/drivers/r600/r600_state.c | 2 +- 3 files changed, 17 insertions(+), 3 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c index ae818bf19b..9ea9d4354d 100644 --- a/src/gallium/drivers/r600/r600_asm.c +++ b/src/gallium/drivers/r600/r600_asm.c @@ -167,8 +167,7 @@ int r600_bc_add_literal(struct r600_bc *bc, const u32 *value) struct r600_bc_alu *alu; if (bc->cf_last == NULL) { - R600_ERR("no last CF\n"); - return -EINVAL; + return 0; } if (bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_TEX) { return 0; diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index cbeb69221c..956c7e7930 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -494,6 +494,21 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s noutput++; } } + /* add fake pixel export */ + if (ctx.type == TGSI_PROCESSOR_FRAGMENT && !noutput) { + memset(&output[0], 0, sizeof(struct r600_bc_output)); + output[0].gpr = 0; + output[0].elem_size = 3; + output[0].swizzle_x = 7; + output[0].swizzle_y = 7; + output[0].swizzle_z = 7; + output[0].swizzle_w = 7; + output[0].barrier = 1; + output[0].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM; + output[0].array_base = 0; + output[0].inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT; + noutput++; + } /* set export done on last export of each type */ for (i = noutput - 1, output_done = 0; i >= 0; i--) { if (i == (noutput - 1)) { diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index a50b75cc79..ed2d9f9984 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -727,7 +727,7 @@ static struct radeon_state *r600_db(struct r600_context *rctx) struct r600_resource *rbuffer; struct radeon_state *rstate; const struct pipe_framebuffer_state *state = &rctx->framebuffer->state.framebuffer; - unsigned level = state->cbufs[0]->level; + unsigned level; unsigned pitch, slice, format; if (state->zsbuf == NULL) -- cgit v1.2.3 From f78445de5d2316934ebeaa19a616d2f960c89237 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Wed, 11 Aug 2010 09:25:45 -0700 Subject: r300/compiler: Implement the CONT opcode. --- src/gallium/drivers/r300/r300_tgsi_to_rc.c | 2 +- .../drivers/dri/r300/compiler/r500_fragprog_emit.c | 19 ++++++++++++++++--- .../dri/r300/compiler/radeon_dataflow_deadcode.c | 2 +- src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c | 4 ++-- src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h | 2 +- 5 files changed, 21 insertions(+), 8 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/drivers/r300/r300_tgsi_to_rc.c b/src/gallium/drivers/r300/r300_tgsi_to_rc.c index 51b2c55550..dd697b9c37 100644 --- a/src/gallium/drivers/r300/r300_tgsi_to_rc.c +++ b/src/gallium/drivers/r300/r300_tgsi_to_rc.c @@ -126,7 +126,7 @@ static unsigned translate_opcode(unsigned opcode) /* case TGSI_OPCODE_SAD: return RC_OPCODE_SAD; */ /* case TGSI_OPCODE_TXF: return RC_OPCODE_TXF; */ /* case TGSI_OPCODE_TXQ: return RC_OPCODE_TXQ; */ - /* case TGSI_OPCODE_CONT: return RC_OPCODE_CONT; */ + case TGSI_OPCODE_CONT: return RC_OPCODE_CONT; /* case TGSI_OPCODE_EMIT: return RC_OPCODE_EMIT; */ /* case TGSI_OPCODE_ENDPRIM: return RC_OPCODE_ENDPRIM; */ /* case TGSI_OPCODE_BGNLOOP2: return RC_OPCODE_BGNLOOP2; */ diff --git a/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c b/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c index c3f817ad4e..dfad12eb15 100644 --- a/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c +++ b/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c @@ -70,6 +70,10 @@ struct loop_info { int * Brks; int BrkCount; int BrkReserved; + + int * Conts; + int ContCount; + int ContReserved; }; struct emit_state { @@ -413,15 +417,18 @@ static void emit_flowcontrol(struct emit_state * s, struct rc_instruction * inst ; break; - case RC_OPCODE_CONTINUE: + case RC_OPCODE_CONT: loop = &s->Loops[s->CurrentLoopDepth - 1]; - s->Code->inst[newip].inst2 = R500_FC_OP_JUMP + memory_pool_array_reserve(&s->C->Pool, int, loop->Conts, + loop->ContCount, loop->ContReserved, 1); + loop->Conts[loop->ContCount++] = newip; + s->Code->inst[newip].inst2 = R500_FC_OP_CONTINUE | R500_FC_JUMP_FUNC(0xff) | R500_FC_B_OP1_DECR | R500_FC_B_POP_CNT( s->CurrentBranchDepth - loop->BranchDepth) + | R500_FC_IGNORE_UNCOVERED ; - s->Code->inst[newip].inst3 = R500_FC_JUMP_ADDR(loop->BgnLoop); break; case RC_OPCODE_ENDLOOP: @@ -449,6 +456,12 @@ static void emit_flowcontrol(struct emit_state * s, struct rc_instruction * inst s->Code->inst[loop->Brks[loop->BrkCount]].inst3 = R500_FC_JUMP_ADDR(newip + 1); } + + /* Set jump address for CONT instructions. */ + while(loop->ContCount--) { + s->Code->inst[loop->Conts[loop->ContCount]].inst3 = + R500_FC_JUMP_ADDR(newip); + } s->CurrentLoopDepth--; break; } diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_deadcode.c b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_deadcode.c index 31566a937f..faf531b412 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_deadcode.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_deadcode.c @@ -274,7 +274,7 @@ void rc_dataflow_deadcode(struct radeon_compiler * c, rc_dataflow_mark_outputs_f } break; } - case RC_OPCODE_CONTINUE: + case RC_OPCODE_CONT: break; case RC_OPCODE_ENDIF: push_branch(&s); diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c index 04f234f11d..2ea830be7f 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c @@ -386,8 +386,8 @@ struct rc_opcode_info rc_opcodes[MAX_RC_OPCODE] = { .NumSrcRegs = 0, }, { - .Opcode = RC_OPCODE_CONTINUE, - .Name = "CONTINUE", + .Opcode = RC_OPCODE_CONT, + .Name = "CONT", .IsFlowControl = 1, .NumSrcRegs = 0 }, diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h index 8b9fa07dde..6e18d6eb3f 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h @@ -187,7 +187,7 @@ typedef enum { RC_OPCODE_ENDLOOP, - RC_OPCODE_CONTINUE, + RC_OPCODE_CONT, /** special instruction, used in R300-R500 fragment program pair instructions * indicates that the result of the alpha operation shall be replicated -- cgit v1.2.3 From 04cfc6234c9bf8c82343e39e0aa9def157e4091d Mon Sep 17 00:00:00 2001 From: Vinson Lee Date: Wed, 11 Aug 2010 12:04:54 -0700 Subject: auxiliary: Add u_staging.c to SCons build. This is a follow-up to commit b85c71d4e1e4ed788be834dff5b7b3c0cd0402ac which added u_staging.c to make. --- src/gallium/auxiliary/SConscript | 1 + 1 file changed, 1 insertion(+) (limited to 'src/gallium') diff --git a/src/gallium/auxiliary/SConscript b/src/gallium/auxiliary/SConscript index 1f09198721..3124e20ce8 100644 --- a/src/gallium/auxiliary/SConscript +++ b/src/gallium/auxiliary/SConscript @@ -180,6 +180,7 @@ source = [ 'util/u_sampler.c', 'util/u_simple_shaders.c', 'util/u_snprintf.c', + 'util/u_staging.c', 'util/u_surface.c', 'util/u_surfaces.c', 'util/u_texture.c', -- cgit v1.2.3 From f7d7b080a17abc1bb25b0d076c8ba275c3fc1cec Mon Sep 17 00:00:00 2001 From: Vinson Lee Date: Wed, 11 Aug 2010 12:18:25 -0700 Subject: translate: Add translate_test.c to SCons. This is a follow-up to commit 945e38c73b46afc12b0655f9fc8200f216a8f8c1, which added translate_test.c to make. --- src/gallium/tests/unit/SConscript | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'src/gallium') diff --git a/src/gallium/tests/unit/SConscript b/src/gallium/tests/unit/SConscript index a200123f44..a0ad58ffa2 100644 --- a/src/gallium/tests/unit/SConscript +++ b/src/gallium/tests/unit/SConscript @@ -11,7 +11,8 @@ progs = [ 'pipe_barrier_test', 'u_cache_test', 'u_format_test', - 'u_half_test' + 'u_half_test', + 'translate_test' ] for prog in progs: -- cgit v1.2.3 From 27fe2347bc2ddf88cb666a95adfb0b12a39d42b9 Mon Sep 17 00:00:00 2001 From: Vinson Lee Date: Wed, 11 Aug 2010 12:45:02 -0700 Subject: auxiliary: Make u_staging.c MSVC compatible. Fixes MSVC build. --- src/gallium/auxiliary/util/u_staging.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/auxiliary/util/u_staging.c b/src/gallium/auxiliary/util/u_staging.c index 4853aaf6d9..4dff8a0b5a 100644 --- a/src/gallium/auxiliary/util/u_staging.c +++ b/src/gallium/auxiliary/util/u_staging.c @@ -22,7 +22,7 @@ util_staging_resource_template(struct pipe_resource *pt, unsigned width, unsigne template->flags = 0; } -inline struct util_staging_transfer * +INLINE struct util_staging_transfer * util_staging_transfer_new(struct pipe_context *pipe, struct pipe_resource *pt, struct pipe_subresource sr, @@ -61,9 +61,10 @@ util_staging_transfer_new(struct pipe_context *pipe, if (usage & PIPE_TRANSFER_READ) { struct pipe_subresource dstsr; + unsigned zi; dstsr.face = 0; dstsr.level = 0; - for(unsigned zi = 0; zi < box->depth; ++zi) + for(zi = 0; zi < box->depth; ++zi) pipe->resource_copy_region(pipe, tx->staging_resource, dstsr, 0, 0, 0, tx->base.resource, sr, box->x, box->y, box->z + zi, box->width, box->height); } @@ -79,9 +80,10 @@ util_staging_transfer_destroy(struct pipe_context *pipe, struct pipe_transfer *p { if(tx->base.usage & PIPE_TRANSFER_WRITE) { struct pipe_subresource srcsr; + unsigned zi; srcsr.face = 0; srcsr.level = 0; - for(unsigned zi = 0; zi < tx->base.box.depth; ++zi) + for(zi = 0; zi < tx->base.box.depth; ++zi) pipe->resource_copy_region(pipe, tx->base.resource, tx->base.sr, tx->base.box.x, tx->base.box.y, tx->base.box.z + zi, tx->staging_resource, srcsr, 0, 0, 0, tx->base.box.width, tx->base.box.height); } -- cgit v1.2.3 From 10adb7840c3a37dedc940fe593b246336eebd71e Mon Sep 17 00:00:00 2001 From: Luca Barbieri Date: Wed, 11 Aug 2010 21:19:12 +0200 Subject: translate: allow clients to ask for supported output formats Currently translate asserts on unsupported output formats, making it impossible to use for some purposes, such as testing whether it actually works on all formats it supports. Removing the assert was met with opposition, so this change allows clients to ask whether an output format is supported, and they are thus able to avoid attempting to use it. Since this is just an addition to the API, no adverse effect is possible, and it makes the testsuite work again. --- src/gallium/auxiliary/translate/translate.c | 5 ++ src/gallium/auxiliary/translate/translate.h | 3 + .../auxiliary/translate/translate_generic.c | 80 ++++++++++++++++++++++ 3 files changed, 88 insertions(+) (limited to 'src/gallium') diff --git a/src/gallium/auxiliary/translate/translate.c b/src/gallium/auxiliary/translate/translate.c index a9b7253bf4..fe638e211f 100644 --- a/src/gallium/auxiliary/translate/translate.c +++ b/src/gallium/auxiliary/translate/translate.c @@ -48,3 +48,8 @@ struct translate *translate_create( const struct translate_key *key ) return translate_generic_create( key ); } + +boolean translate_is_output_format_supported(enum pipe_format format) +{ + return translate_generic_is_output_format_supported(format); +} diff --git a/src/gallium/auxiliary/translate/translate.h b/src/gallium/auxiliary/translate/translate.h index edd95e0788..eb6f2cc486 100644 --- a/src/gallium/auxiliary/translate/translate.h +++ b/src/gallium/auxiliary/translate/translate.h @@ -105,6 +105,8 @@ struct translate *translate_lookup_or_create( struct translate_context *tctx, struct translate *translate_create( const struct translate_key *key ); +boolean translate_is_output_format_supported(enum pipe_format format); + static INLINE int translate_keysize( const struct translate_key *key ) { return 2 * sizeof(int) + key->nr_elements * sizeof(struct translate_element); @@ -138,5 +140,6 @@ struct translate *translate_sse2_create( const struct translate_key *key ); struct translate *translate_generic_create( const struct translate_key *key ); +boolean translate_generic_is_output_format_supported(enum pipe_format format); #endif diff --git a/src/gallium/auxiliary/translate/translate_generic.c b/src/gallium/auxiliary/translate/translate_generic.c index 0baa49a192..42cfd763e9 100644 --- a/src/gallium/auxiliary/translate/translate_generic.c +++ b/src/gallium/auxiliary/translate/translate_generic.c @@ -554,3 +554,83 @@ struct translate *translate_generic_create( const struct translate_key *key ) return &tg->translate; } + +boolean translate_generic_is_output_format_supported(enum pipe_format format) +{ + switch(format) + { + case PIPE_FORMAT_R64G64B64A64_FLOAT: return TRUE; + case PIPE_FORMAT_R64G64B64_FLOAT: return TRUE; + case PIPE_FORMAT_R64G64_FLOAT: return TRUE; + case PIPE_FORMAT_R64_FLOAT: return TRUE; + + case PIPE_FORMAT_R32G32B32A32_FLOAT: return TRUE; + case PIPE_FORMAT_R32G32B32_FLOAT: return TRUE; + case PIPE_FORMAT_R32G32_FLOAT: return TRUE; + case PIPE_FORMAT_R32_FLOAT: return TRUE; + + case PIPE_FORMAT_R32G32B32A32_USCALED: return TRUE; + case PIPE_FORMAT_R32G32B32_USCALED: return TRUE; + case PIPE_FORMAT_R32G32_USCALED: return TRUE; + case PIPE_FORMAT_R32_USCALED: return TRUE; + + case PIPE_FORMAT_R32G32B32A32_SSCALED: return TRUE; + case PIPE_FORMAT_R32G32B32_SSCALED: return TRUE; + case PIPE_FORMAT_R32G32_SSCALED: return TRUE; + case PIPE_FORMAT_R32_SSCALED: return TRUE; + + case PIPE_FORMAT_R32G32B32A32_UNORM: return TRUE; + case PIPE_FORMAT_R32G32B32_UNORM: return TRUE; + case PIPE_FORMAT_R32G32_UNORM: return TRUE; + case PIPE_FORMAT_R32_UNORM: return TRUE; + + case PIPE_FORMAT_R32G32B32A32_SNORM: return TRUE; + case PIPE_FORMAT_R32G32B32_SNORM: return TRUE; + case PIPE_FORMAT_R32G32_SNORM: return TRUE; + case PIPE_FORMAT_R32_SNORM: return TRUE; + + case PIPE_FORMAT_R16G16B16A16_USCALED: return TRUE; + case PIPE_FORMAT_R16G16B16_USCALED: return TRUE; + case PIPE_FORMAT_R16G16_USCALED: return TRUE; + case PIPE_FORMAT_R16_USCALED: return TRUE; + + case PIPE_FORMAT_R16G16B16A16_SSCALED: return TRUE; + case PIPE_FORMAT_R16G16B16_SSCALED: return TRUE; + case PIPE_FORMAT_R16G16_SSCALED: return TRUE; + case PIPE_FORMAT_R16_SSCALED: return TRUE; + + case PIPE_FORMAT_R16G16B16A16_UNORM: return TRUE; + case PIPE_FORMAT_R16G16B16_UNORM: return TRUE; + case PIPE_FORMAT_R16G16_UNORM: return TRUE; + case PIPE_FORMAT_R16_UNORM: return TRUE; + + case PIPE_FORMAT_R16G16B16A16_SNORM: return TRUE; + case PIPE_FORMAT_R16G16B16_SNORM: return TRUE; + case PIPE_FORMAT_R16G16_SNORM: return TRUE; + case PIPE_FORMAT_R16_SNORM: return TRUE; + + case PIPE_FORMAT_R8G8B8A8_USCALED: return TRUE; + case PIPE_FORMAT_R8G8B8_USCALED: return TRUE; + case PIPE_FORMAT_R8G8_USCALED: return TRUE; + case PIPE_FORMAT_R8_USCALED: return TRUE; + + case PIPE_FORMAT_R8G8B8A8_SSCALED: return TRUE; + case PIPE_FORMAT_R8G8B8_SSCALED: return TRUE; + case PIPE_FORMAT_R8G8_SSCALED: return TRUE; + case PIPE_FORMAT_R8_SSCALED: return TRUE; + + case PIPE_FORMAT_R8G8B8A8_UNORM: return TRUE; + case PIPE_FORMAT_R8G8B8_UNORM: return TRUE; + case PIPE_FORMAT_R8G8_UNORM: return TRUE; + case PIPE_FORMAT_R8_UNORM: return TRUE; + + case PIPE_FORMAT_R8G8B8A8_SNORM: return TRUE; + case PIPE_FORMAT_R8G8B8_SNORM: return TRUE; + case PIPE_FORMAT_R8G8_SNORM: return TRUE; + case PIPE_FORMAT_R8_SNORM: return TRUE; + + case PIPE_FORMAT_A8R8G8B8_UNORM: return TRUE; + case PIPE_FORMAT_B8G8R8A8_UNORM: return TRUE; + default: return FALSE; + } +} -- cgit v1.2.3 From 99cc6d70a4dc87c67f3eec9b118a853947718373 Mon Sep 17 00:00:00 2001 From: Luca Barbieri Date: Wed, 11 Aug 2010 21:28:33 +0200 Subject: translate_test: fix, reindent, and improve 1. Fix the segfault due to the reverted commit using the new interface 2. Reindent to Mesa 3 spaces style 3. Improve output and return success/failure with error code 4. Add much better support for testing translate_sse --- src/gallium/tests/unit/translate_test.c | 341 +++++++++++++++++++------------- 1 file changed, 204 insertions(+), 137 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/tests/unit/translate_test.c b/src/gallium/tests/unit/translate_test.c index 473c73afaa..a934a6abf4 100644 --- a/src/gallium/tests/unit/translate_test.c +++ b/src/gallium/tests/unit/translate_test.c @@ -27,144 +27,211 @@ #include #include #include +#include +#include int main(int argc, char** argv) { - struct translate *(*create_fn)( const struct translate_key *key ) = 0; - - struct translate_key key; - unsigned output_format; - unsigned input_format; - unsigned char* buffer[5]; - unsigned count = 4; - unsigned i, j, k; - unsigned passed = 0; - unsigned total = 0; - - create_fn = 0; - - if(argc > 1 && !strcmp(argv[1], "generic")) - create_fn = translate_generic_create; - else if(argc > 1 && !strcmp(argv[1], "x86")) - create_fn = translate_sse2_create; - - if(!create_fn) - { - printf("Usage: ./translate_test [generic|x86]\n"); - return 1; - } - - for(i = 0; i < Elements(buffer); ++i) - buffer[i] = align_malloc(4096, 4096); - - key.nr_elements = 1; - key.element[0].input_buffer = 0; - key.element[0].input_offset = 0; - key.element[0].output_offset = 0; - key.element[0].type = TRANSLATE_ELEMENT_NORMAL; - key.element[0].instance_divisor = 0; - - srand(4359025); - for(i = 0; i < 4096; ++i) - buffer[0][i] = rand() & 0x7f; /* avoid negative values that work badly when converted to unsigned format*/ - - for(output_format = 0; output_format < PIPE_FORMAT_COUNT; ++output_format) - { - const struct util_format_description* output_format_desc = util_format_description(output_format); - unsigned output_format_size = util_format_get_stride(output_format, 1); - if(!output_format_desc - || !output_format_desc->fetch_rgba_float - || !output_format_desc->pack_rgba_float - || output_format_desc->colorspace != UTIL_FORMAT_COLORSPACE_RGB - || output_format_desc->layout != UTIL_FORMAT_LAYOUT_PLAIN - ) - continue; - - for(input_format = 0; input_format < PIPE_FORMAT_COUNT; ++input_format) - { - const struct util_format_description* input_format_desc = util_format_description(input_format); - unsigned input_format_size = util_format_get_stride(input_format, 1); - struct translate* translate[2]; - unsigned fail = 0; - - if(!input_format_desc - || !input_format_desc->fetch_rgba_float - || !input_format_desc->pack_rgba_float - || input_format_desc->colorspace != UTIL_FORMAT_COLORSPACE_RGB - || input_format_desc->layout != UTIL_FORMAT_LAYOUT_PLAIN - ) - continue; - - key.element[0].input_format = input_format; - key.element[0].output_format = output_format; - key.output_stride = output_format_size; - translate[0] = create_fn(&key); - if(!translate[0]) - continue; - - key.element[0].input_format = output_format; - key.element[0].output_format = input_format; - key.output_stride = input_format_size; - translate[1] = create_fn(&key); - if(!translate[1]) - goto free_translate0; - - translate[0]->set_buffer(translate[0], 0, buffer[0], input_format_size, ~0); - translate[0]->run(translate[0], 0, count, 0, buffer[1]); - translate[1]->set_buffer(translate[1], 0, buffer[1], output_format_size, ~0); - translate[1]->run(translate[1], 0, count, 0, buffer[2]); - translate[0]->set_buffer(translate[0], 0, buffer[2], input_format_size, ~0); - translate[0]->run(translate[0], 0, count, 0, buffer[3]); - translate[1]->set_buffer(translate[1], 0, buffer[3], output_format_size, ~0); - translate[1]->run(translate[1], 0, count, 0, buffer[4]); - - for(i = 0; i < count; ++i) - { - float a[4]; - float b[4]; - float error = 0.03125; - input_format_desc->fetch_rgba_float(a, buffer[2] + i * input_format_size, 0, 0); - input_format_desc->fetch_rgba_float(b, buffer[4] + i * input_format_size, 0, 0); - - for(j = 0; j < count; ++j) - { - float d = a[j] - b[j]; - if(d > error || d < -error) - { - fail = 1; - break; - } - } - } - - if(fail) - { - printf("FAIL: %s -> %s -> %s -> %s -> %s\n", input_format_desc->name, output_format_desc->name, input_format_desc->name, output_format_desc->name, input_format_desc->name); - for(i = 0; i < Elements(buffer); ++i) - { - unsigned format_size = (i & 1) ? output_format_size : input_format_size; - printf("%c ", (i == 2 || i == 4) ? '*' : ' '); - for(j = 0; j < count; ++j) - { - for(k = 0; k < format_size; ++k) - { - printf("%02x", buffer[i][j * format_size + k]); - } - printf(" "); - } - printf("\n"); - } - } - else - ++passed; - ++total; - - translate[1]->release(translate[1]); -free_translate0: - translate[0]->release(translate[0]); - } - } - - printf("%u/%u tests passed for translate_%s\n", passed, total, argv[1]); - return 0; + struct translate *(*create_fn)(const struct translate_key *key) = 0; + + struct translate_key key; + unsigned output_format; + unsigned input_format; + unsigned char* buffer[5]; + unsigned count = 4; + unsigned i, j, k; + unsigned passed = 0; + unsigned total = 0; + const float error = 0.03125; + + create_fn = 0; + + util_cpu_detect(); + + if(argc <= 1) + {} + else if (!strcmp(argv[1], "generic")) + create_fn = translate_generic_create; + else if (!strcmp(argv[1], "x86")) + create_fn = translate_sse2_create; + else if (!strcmp(argv[1], "nosse")) + { + util_cpu_caps.has_sse = 0; + util_cpu_caps.has_sse2 = 0; + util_cpu_caps.has_sse3 = 0; + util_cpu_caps.has_sse4_1 = 0; + create_fn = translate_sse2_create; + } + else if (!strcmp(argv[1], "sse")) + { + if(!util_cpu_caps.has_sse || !rtasm_cpu_has_sse()) + { + printf("Error: CPU doesn't support SSE (test with qemu)\n"); + return 2; + } + util_cpu_caps.has_sse2 = 0; + util_cpu_caps.has_sse3 = 0; + util_cpu_caps.has_sse4_1 = 0; + create_fn = translate_sse2_create; + } + else if (!strcmp(argv[1], "sse2")) + { + if(!util_cpu_caps.has_sse2 || !rtasm_cpu_has_sse()) + { + printf("Error: CPU doesn't support SSE2 (test with qemu)\n"); + return 2; + } + util_cpu_caps.has_sse3 = 0; + util_cpu_caps.has_sse4_1 = 0; + create_fn = translate_sse2_create; + } + else if (!strcmp(argv[1], "sse3")) + { + if(!util_cpu_caps.has_sse3 || !rtasm_cpu_has_sse()) + { + printf("Error: CPU doesn't support SSE3 (test with qemu)\n"); + return 2; + } + util_cpu_caps.has_sse4_1 = 0; + create_fn = translate_sse2_create; + } + else if (!strcmp(argv[1], "sse4.1")) + { + if(!util_cpu_caps.has_sse4_1 || !rtasm_cpu_has_sse()) + { + printf("Error: CPU doesn't support SSE4.1 (test with qemu)\n"); + return 2; + } + create_fn = translate_sse2_create; + } + + if (!create_fn) + { + printf("Usage: ./translate_test [generic|x86|nosse|sse|sse2|sse3|sse4.1]\n"); + return 2; + } + + for (i = 0; i < Elements(buffer); ++i) + buffer[i] = align_malloc(4096, 4096); + + key.nr_elements = 1; + key.element[0].input_buffer = 0; + key.element[0].input_offset = 0; + key.element[0].output_offset = 0; + key.element[0].type = TRANSLATE_ELEMENT_NORMAL; + key.element[0].instance_divisor = 0; + + srand(4359025); + for (i = 0; i < 4096; ++i) + buffer[0][i] = rand() & 0x7f; /* avoid negative values that work badly when converted to unsigned format*/ + + for (output_format = 1; output_format < PIPE_FORMAT_COUNT; ++output_format) + { + const struct util_format_description* output_format_desc = util_format_description(output_format); + unsigned output_format_size = util_format_get_stride(output_format, 1); + if (!output_format_desc + || !output_format_desc->fetch_rgba_float + || !output_format_desc->pack_rgba_float + || output_format_desc->colorspace != UTIL_FORMAT_COLORSPACE_RGB + || output_format_desc->layout != UTIL_FORMAT_LAYOUT_PLAIN + || !translate_is_output_format_supported(output_format)) + continue; + + for (input_format = 1; input_format < PIPE_FORMAT_COUNT; ++input_format) + { + const struct util_format_description* input_format_desc = util_format_description(input_format); + unsigned input_format_size = util_format_get_stride(input_format, 1); + struct translate* translate[2]; + unsigned fail = 0; + unsigned used_generic = 0; + + if (!input_format_desc + || !input_format_desc->fetch_rgba_float + || !input_format_desc->pack_rgba_float + || input_format_desc->colorspace != UTIL_FORMAT_COLORSPACE_RGB + || input_format_desc->layout != UTIL_FORMAT_LAYOUT_PLAIN + || !translate_is_output_format_supported(input_format)) + continue; + + key.element[0].input_format = input_format; + key.element[0].output_format = output_format; + key.output_stride = output_format_size; + translate[0] = create_fn(&key); + if (!translate[0]) + continue; + + key.element[0].input_format = output_format; + key.element[0].output_format = input_format; + key.output_stride = input_format_size; + translate[1] = create_fn(&key); + if(!translate[1]) + { + used_generic = 1; + translate[1] = translate_generic_create(&key); + if(!translate[1]) + continue; + } + + translate[0]->set_buffer(translate[0], 0, buffer[0], input_format_size, ~0); + translate[0]->run(translate[0], 0, count, 0, buffer[1]); + translate[1]->set_buffer(translate[1], 0, buffer[1], output_format_size, ~0); + translate[1]->run(translate[1], 0, count, 0, buffer[2]); + translate[0]->set_buffer(translate[0], 0, buffer[2], input_format_size, ~0); + translate[0]->run(translate[0], 0, count, 0, buffer[3]); + translate[1]->set_buffer(translate[1], 0, buffer[3], output_format_size, ~0); + translate[1]->run(translate[1], 0, count, 0, buffer[4]); + + for (i = 0; i < count; ++i) + { + float a[4]; + float b[4]; + input_format_desc->fetch_rgba_float(a, buffer[2] + i * input_format_size, 0, 0); + input_format_desc->fetch_rgba_float(b, buffer[4] + i * input_format_size, 0, 0); + + for (j = 0; j < count; ++j) + { + float d = a[j] - b[j]; + if (d > error || d < -error) + { + fail = 1; + break; + } + } + } + + printf("%s%s: %s -> %s -> %s -> %s -> %s\n", + fail ? "FAIL" : "PASS", + used_generic ? "[GENERIC]" : "", + input_format_desc->name, output_format_desc->name, input_format_desc->name, output_format_desc->name, input_format_desc->name); + + if (fail) + { + for (i = 0; i < Elements(buffer); ++i) + { + unsigned format_size = (i & 1) ? output_format_size : input_format_size; + printf("%c ", (i == 2 || i == 4) ? '*' : ' '); + for (j = 0; j < count; ++j) + { + for (k = 0; k < format_size; ++k) + { + printf("%02x", buffer[i][j * format_size + k]); + } + printf(" "); + } + printf("\n"); + } + } + + if (!fail) + ++passed; + ++total; + + if(translate[1]) + translate[1]->release(translate[1]); + translate[0]->release(translate[0]); + } + } + + printf("%u/%u tests passed for translate_%s\n", passed, total, argv[1]); + return passed != total; } -- cgit v1.2.3 From 41c7ff11e6aa4d82d1175446aea0984bf28e3905 Mon Sep 17 00:00:00 2001 From: Luca Barbieri Date: Wed, 11 Aug 2010 22:11:19 +0200 Subject: u_staging: remove useless inline keyword --- src/gallium/auxiliary/util/u_staging.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium') diff --git a/src/gallium/auxiliary/util/u_staging.c b/src/gallium/auxiliary/util/u_staging.c index 4dff8a0b5a..607c31f5ee 100644 --- a/src/gallium/auxiliary/util/u_staging.c +++ b/src/gallium/auxiliary/util/u_staging.c @@ -22,7 +22,7 @@ util_staging_resource_template(struct pipe_resource *pt, unsigned width, unsigne template->flags = 0; } -INLINE struct util_staging_transfer * +struct util_staging_transfer * util_staging_transfer_new(struct pipe_context *pipe, struct pipe_resource *pt, struct pipe_subresource sr, -- cgit v1.2.3 From 39cd4f7ceb09dc3050f93d1fb326faf24c11150e Mon Sep 17 00:00:00 2001 From: Luca Barbieri Date: Wed, 11 Aug 2010 22:26:57 +0200 Subject: translate_test: fix segfault on x86-64 --- src/gallium/tests/unit/translate_test.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/tests/unit/translate_test.c b/src/gallium/tests/unit/translate_test.c index a934a6abf4..097a5fd937 100644 --- a/src/gallium/tests/unit/translate_test.c +++ b/src/gallium/tests/unit/translate_test.c @@ -128,7 +128,7 @@ int main(int argc, char** argv) for (output_format = 1; output_format < PIPE_FORMAT_COUNT; ++output_format) { const struct util_format_description* output_format_desc = util_format_description(output_format); - unsigned output_format_size = util_format_get_stride(output_format, 1); + unsigned output_format_size; if (!output_format_desc || !output_format_desc->fetch_rgba_float || !output_format_desc->pack_rgba_float @@ -137,10 +137,12 @@ int main(int argc, char** argv) || !translate_is_output_format_supported(output_format)) continue; + output_format_size = util_format_get_stride(output_format, 1); + for (input_format = 1; input_format < PIPE_FORMAT_COUNT; ++input_format) { const struct util_format_description* input_format_desc = util_format_description(input_format); - unsigned input_format_size = util_format_get_stride(input_format, 1); + unsigned input_format_size; struct translate* translate[2]; unsigned fail = 0; unsigned used_generic = 0; @@ -153,6 +155,8 @@ int main(int argc, char** argv) || !translate_is_output_format_supported(input_format)) continue; + input_format_size = util_format_get_stride(input_format, 1); + key.element[0].input_format = input_format; key.element[0].output_format = output_format; key.output_stride = output_format_size; -- cgit v1.2.3 From e1bb9ee7a6b7dea0e0388ffb375447ad40f38499 Mon Sep 17 00:00:00 2001 From: Vinson Lee Date: Wed, 11 Aug 2010 18:57:27 -0700 Subject: scons: Fix freebsd8 build. --- src/gallium/tests/graw/SConscript | 5 ++++- src/gallium/tests/unit/SConscript | 5 ++++- 2 files changed, 8 insertions(+), 2 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/tests/graw/SConscript b/src/gallium/tests/graw/SConscript index 7e39ec21a4..860a17e13e 100644 --- a/src/gallium/tests/graw/SConscript +++ b/src/gallium/tests/graw/SConscript @@ -11,9 +11,12 @@ env = env.Clone() env.Prepend(LIBPATH = [graw.dir]) env.Prepend(LIBS = ['graw'] + gallium) -if platform == 'sunos5': +if platform in ('freebsd8', 'sunos5'): env.Append(LIBS = ['m']) +if platform == 'freebsd8': + env.Append(LIBS = ['pthread']) + progs = [ 'clear', 'tri', diff --git a/src/gallium/tests/unit/SConscript b/src/gallium/tests/unit/SConscript index a0ad58ffa2..edc68e34d9 100644 --- a/src/gallium/tests/unit/SConscript +++ b/src/gallium/tests/unit/SConscript @@ -4,9 +4,12 @@ env = env.Clone() env.Prepend(LIBS = [gallium]) -if platform == 'sunos5': +if platform in ('freebsd8', 'sunos5'): env.Append(LIBS = ['m']) +if platform == 'freebsd8': + env.Append(LIBS = ['pthread']) + progs = [ 'pipe_barrier_test', 'u_cache_test', -- cgit v1.2.3 From de4784e36505316c2a5ab34cc5b371d17f38d3c5 Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Thu, 12 Aug 2010 05:06:21 +0200 Subject: u_blitter: unify clear_depth_stencil and flush_depth_stencil No need to enable depth test for clear. --- src/gallium/auxiliary/util/u_blitter.c | 46 ---------------------------------- src/gallium/auxiliary/util/u_blitter.h | 2 -- src/gallium/drivers/r300/r300_blit.c | 4 ++- 3 files changed, 3 insertions(+), 49 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/auxiliary/util/u_blitter.c b/src/gallium/auxiliary/util/u_blitter.c index b5b86b7214..1b9e957e3c 100644 --- a/src/gallium/auxiliary/util/u_blitter.c +++ b/src/gallium/auxiliary/util/u_blitter.c @@ -87,7 +87,6 @@ struct blitter_context_priv void *dsa_write_depth_keep_stencil; void *dsa_keep_depth_stencil; void *dsa_keep_depth_write_stencil; - void *dsa_flush_depth_stencil; void *velem_state; @@ -158,12 +157,6 @@ struct blitter_context *util_blitter_create(struct pipe_context *pipe) pipe->create_depth_stencil_alpha_state(pipe, &dsa); dsa.depth.writemask = 1; - ctx->dsa_flush_depth_stencil = - pipe->create_depth_stencil_alpha_state(pipe, &dsa); - - dsa.depth.enabled = 1; - dsa.depth.writemask = 1; - dsa.depth.func = PIPE_FUNC_ALWAYS; ctx->dsa_write_depth_keep_stencil = pipe->create_depth_stencil_alpha_state(pipe, &dsa); @@ -945,42 +938,3 @@ void util_blitter_clear_depth_stencil(struct blitter_context *blitter, UTIL_BLITTER_ATTRIB_NONE, NULL); blitter_restore_CSOs(ctx); } - -/* Clear a region of a depth stencil surface. */ -void util_blitter_flush_depth_stencil(struct blitter_context *blitter, - struct pipe_surface *dstsurf) -{ - struct blitter_context_priv *ctx = (struct blitter_context_priv*)blitter; - struct pipe_context *pipe = ctx->base.pipe; - struct pipe_framebuffer_state fb_state; - - assert(dstsurf->texture); - if (!dstsurf->texture) - return; - - /* check the saved state */ - blitter_check_saved_CSOs(ctx); - assert(blitter->saved_fb_state.nr_cbufs != ~0); - - /* bind CSOs */ - pipe->bind_blend_state(pipe, ctx->blend_keep_color); - pipe->bind_depth_stencil_alpha_state(pipe, ctx->dsa_flush_depth_stencil); - - pipe->bind_rasterizer_state(pipe, ctx->rs_state); - pipe->bind_fs_state(pipe, blitter_get_fs_col(ctx, 0)); - pipe->bind_vs_state(pipe, ctx->vs_col); - pipe->bind_vertex_elements_state(pipe, ctx->velem_state); - - /* set a framebuffer state */ - fb_state.width = dstsurf->width; - fb_state.height = dstsurf->height; - fb_state.nr_cbufs = 0; - fb_state.cbufs[0] = 0; - fb_state.zsbuf = dstsurf; - pipe->set_framebuffer_state(pipe, &fb_state); - - blitter_set_dst_dimensions(ctx, dstsurf->width, dstsurf->height); - blitter->draw_rectangle(blitter, 0, 0, dstsurf->width, dstsurf->height, 0, - UTIL_BLITTER_ATTRIB_NONE, NULL); - blitter_restore_CSOs(ctx); -} diff --git a/src/gallium/auxiliary/util/u_blitter.h b/src/gallium/auxiliary/util/u_blitter.h index f316587dea..ba3f92eca8 100644 --- a/src/gallium/auxiliary/util/u_blitter.h +++ b/src/gallium/auxiliary/util/u_blitter.h @@ -200,8 +200,6 @@ void util_blitter_clear_depth_stencil(struct blitter_context *blitter, unsigned dstx, unsigned dsty, unsigned width, unsigned height); -void util_blitter_flush_depth_stencil(struct blitter_context *blitter, - struct pipe_surface *dstsurf); /* The functions below should be used to save currently bound constant state * objects inside a driver. The objects are automatically restored at the end * of the util_blitter_{clear, copy_region, fill_region} functions and then diff --git a/src/gallium/drivers/r300/r300_blit.c b/src/gallium/drivers/r300/r300_blit.c index ff52286b5c..67e8288440 100644 --- a/src/gallium/drivers/r300/r300_blit.c +++ b/src/gallium/drivers/r300/r300_blit.c @@ -279,7 +279,9 @@ void r300_flush_depth_stencil(struct pipe_context *pipe, PIPE_BIND_DEPTH_STENCIL); r300->z_decomp_rd = TRUE; r300_blitter_begin(r300, R300_CLEAR_SURFACE); - util_blitter_flush_depth_stencil(r300->blitter, dstsurf); + util_blitter_clear_depth_stencil(r300->blitter, dstsurf, + PIPE_CLEAR_DEPTH, 0, 0, + 0, 0, dstsurf->width, dstsurf->height); r300_blitter_end(r300); r300->z_decomp_rd = FALSE; -- cgit v1.2.3 From e2df0a8b234efde140b340c2c9b67b06b789b758 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Thu, 12 Aug 2010 16:06:40 +1000 Subject: r600g: improve texture format checker. This takes the r300g texture format checker and fixes it up for r600g, it passes glean texSwizzle, pixelformats, and texture_srgb tests, however I think it L8S8_SRGB is broken as is L8_SRGB, need to investigate. Signed-off-by: Dave Airlie --- src/gallium/drivers/r600/r600_context.h | 3 + src/gallium/drivers/r600/r600_state.c | 22 +-- src/gallium/drivers/r600/r600_state_inlines.h | 2 +- src/gallium/drivers/r600/r600_texture.c | 248 ++++++++++++++++++++++++++ 4 files changed, 263 insertions(+), 12 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/drivers/r600/r600_context.h b/src/gallium/drivers/r600/r600_context.h index c606dbbda3..76d5de8653 100644 --- a/src/gallium/drivers/r600/r600_context.h +++ b/src/gallium/drivers/r600/r600_context.h @@ -175,4 +175,7 @@ extern int r600_pipe_shader_update(struct pipe_context *ctx, #define R600_ERR(fmt, args...) \ fprintf(stderr, "EE %s/%s:%d - "fmt, __FILE__, __func__, __LINE__, ##args) +uint32_t r600_translate_texformat(enum pipe_format format, + const unsigned char *swizzle_view, + uint32_t *word4_p, uint32_t *yuv_format_p); #endif diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index ed2d9f9984..46e8f2ae1f 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -1169,8 +1169,16 @@ static struct radeon_state *r600_resource(struct r600_context *rctx, struct r600_resource *rbuffer; struct radeon_state *rstate; unsigned format; - - format = r600_translate_colorformat(view->texture->format); + uint32_t word4 = 0, yuv_format = 0; + unsigned char swizzle[4]; + + swizzle[0] = view->swizzle_r; + swizzle[1] = view->swizzle_g; + swizzle[2] = view->swizzle_b; + swizzle[3] = view->swizzle_a; + format = r600_translate_texformat(view->texture->format, + swizzle, + &word4, &yuv_format); if (format == ~0) return NULL; desc = util_format_description(view->texture->format); @@ -1204,18 +1212,10 @@ static struct radeon_state *r600_resource(struct r600_context *rctx, rstate->states[R600_PS_RESOURCE__RESOURCE0_WORD2] = 0; rstate->states[R600_PS_RESOURCE__RESOURCE0_WORD3] = tmp->offset[1] >> 8; rstate->states[R600_PS_RESOURCE__RESOURCE0_WORD4] = - S_038010_FORMAT_COMP_X(r600_format_type(UTIL_FORMAT_TYPE_UNSIGNED)) | - S_038010_FORMAT_COMP_Y(r600_format_type(UTIL_FORMAT_TYPE_UNSIGNED)) | - S_038010_FORMAT_COMP_Z(r600_format_type(UTIL_FORMAT_TYPE_UNSIGNED)) | - S_038010_FORMAT_COMP_W(r600_format_type(UTIL_FORMAT_TYPE_UNSIGNED)) | + word4 | S_038010_NUM_FORMAT_ALL(V_038010_SQ_NUM_FORMAT_NORM) | S_038010_SRF_MODE_ALL(V_038010_SFR_MODE_NO_ZERO) | S_038010_REQUEST_SIZE(1) | - S_038010_DST_SEL_X(r600_tex_swizzle(view->swizzle_b)) | - S_038010_DST_SEL_Y(r600_tex_swizzle(view->swizzle_g)) | - S_038010_DST_SEL_Z(r600_tex_swizzle(view->swizzle_r)) | - S_038010_DST_SEL_W(r600_tex_swizzle(view->swizzle_a)) | - S_038010_FORCE_DEGAMMA(desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB ? 1 : 0) | S_038010_BASE_LEVEL(view->first_level); rstate->states[R600_PS_RESOURCE__RESOURCE0_WORD5] = S_038014_LAST_LEVEL(view->last_level) | diff --git a/src/gallium/drivers/r600/r600_state_inlines.h b/src/gallium/drivers/r600/r600_state_inlines.h index 8271ad19fb..060a27cd6f 100644 --- a/src/gallium/drivers/r600/r600_state_inlines.h +++ b/src/gallium/drivers/r600/r600_state_inlines.h @@ -289,7 +289,7 @@ static INLINE uint32_t r600_translate_colorformat(enum pipe_format format) static INLINE boolean r600_is_sampler_format_supported(enum pipe_format format) { - return r600_translate_colorformat(format) != ~0; + return r600_translate_texformat(format, NULL, NULL, NULL) != ~0; } static INLINE boolean r600_is_colorbuffer_format_supported(enum pipe_format format) diff --git a/src/gallium/drivers/r600/r600_texture.c b/src/gallium/drivers/r600/r600_texture.c index 1bce911306..30d79ebdd6 100644 --- a/src/gallium/drivers/r600/r600_texture.c +++ b/src/gallium/drivers/r600/r600_texture.c @@ -33,6 +33,7 @@ #include "r600_screen.h" #include "r600_context.h" #include "r600_resource.h" +#include "r600d.h" extern struct u_resource_vtbl r600_texture_vtbl; @@ -277,3 +278,250 @@ void r600_init_screen_texture_functions(struct pipe_screen *screen) screen->get_tex_surface = r600_get_tex_surface; screen->tex_surface_destroy = r600_tex_surface_destroy; } + +static unsigned r600_get_swizzle_combined(const unsigned char *swizzle_format, + const unsigned char *swizzle_view) +{ + unsigned i; + unsigned char swizzle[4]; + unsigned result = 0; + const uint32_t swizzle_shift[4] = { + 16, 19, 22, 25, + }; + const uint32_t swizzle_bit[4] = { + 0, 1, 2, 3, + }; + + if (swizzle_view) { + /* Combine two sets of swizzles. */ + for (i = 0; i < 4; i++) { + swizzle[i] = swizzle_view[i] <= UTIL_FORMAT_SWIZZLE_W ? + swizzle_format[swizzle_view[i]] : swizzle_view[i]; + } + } else { + memcpy(swizzle, swizzle_format, 4); + } + + /* Get swizzle. */ + for (i = 0; i < 4; i++) { + switch (swizzle[i]) { + case UTIL_FORMAT_SWIZZLE_Y: + result |= swizzle_bit[1] << swizzle_shift[i]; + break; + case UTIL_FORMAT_SWIZZLE_Z: + result |= swizzle_bit[2] << swizzle_shift[i]; + break; + case UTIL_FORMAT_SWIZZLE_W: + result |= swizzle_bit[3] << swizzle_shift[i]; + break; + case UTIL_FORMAT_SWIZZLE_0: + result |= V_038010_SQ_SEL_0 << swizzle_shift[i]; + break; + case UTIL_FORMAT_SWIZZLE_1: + result |= V_038010_SQ_SEL_1 << swizzle_shift[i]; + break; + default: /* UTIL_FORMAT_SWIZZLE_X */ + result |= swizzle_bit[0] << swizzle_shift[i]; + } + } + return result; +} + +/* texture format translate */ +uint32_t r600_translate_texformat(enum pipe_format format, + const unsigned char *swizzle_view, + uint32_t *word4_p, uint32_t *yuv_format_p) +{ + uint32_t result = 0, word4 = 0, yuv_format = 0; + const struct util_format_description *desc; + boolean uniform = TRUE; + int i; + const uint32_t sign_bit[4] = { + S_038010_FORMAT_COMP_X(V_038010_SQ_FORMAT_COMP_SIGNED), + S_038010_FORMAT_COMP_Y(V_038010_SQ_FORMAT_COMP_SIGNED), + S_038010_FORMAT_COMP_Z(V_038010_SQ_FORMAT_COMP_SIGNED), + S_038010_FORMAT_COMP_W(V_038010_SQ_FORMAT_COMP_SIGNED) + }; + desc = util_format_description(format); + + /* Colorspace (return non-RGB formats directly). */ + switch (desc->colorspace) { + /* Depth stencil formats */ + case UTIL_FORMAT_COLORSPACE_ZS: + switch (format) { + case PIPE_FORMAT_Z16_UNORM: + result = V_028010_DEPTH_16; + goto out_word4; + case PIPE_FORMAT_Z24X8_UNORM: + result = V_028010_DEPTH_X8_24; + goto out_word4; + case PIPE_FORMAT_Z24_UNORM_S8_USCALED: + result = V_028010_DEPTH_8_24; + goto out_word4; + default: + goto out_unknown; + } + + case UTIL_FORMAT_COLORSPACE_YUV: + yuv_format |= (1 << 30); + switch (format) { + case PIPE_FORMAT_UYVY: + case PIPE_FORMAT_YUYV: + default: + break; + } + goto out_unknown; /* TODO */ + + case UTIL_FORMAT_COLORSPACE_SRGB: + word4 |= S_038010_FORCE_DEGAMMA(1); + if (format == PIPE_FORMAT_L8A8_SRGB || format == PIPE_FORMAT_L8_SRGB) + goto out_unknown; /* fails for some reason - TODO */ + break; + + default: + break; + } + + word4 |= r600_get_swizzle_combined(desc->swizzle, swizzle_view); + + /* S3TC formats. TODO */ + if (desc->layout == UTIL_FORMAT_LAYOUT_S3TC) { + goto out_unknown; + } + + + for (i = 0; i < desc->nr_channels; i++) { + if (desc->channel[i].type == UTIL_FORMAT_TYPE_SIGNED) { + word4 |= sign_bit[i]; + } + } + + /* R8G8Bx_SNORM - TODO CxV8U8 */ + + /* RGTC - TODO */ + + /* See whether the components are of the same size. */ + for (i = 1; i < desc->nr_channels; i++) { + uniform = uniform && desc->channel[0].size == desc->channel[i].size; + } + + /* Non-uniform formats. */ + if (!uniform) { + switch(desc->nr_channels) { + case 3: + if (desc->channel[0].size == 5 && + desc->channel[1].size == 6 && + desc->channel[2].size == 5) { + result |= V_0280A0_COLOR_5_6_5; + goto out_word4; + } + goto out_unknown; + case 4: + if (desc->channel[0].size == 5 && + desc->channel[1].size == 5 && + desc->channel[2].size == 5 && + desc->channel[3].size == 1) { + result |= V_0280A0_COLOR_1_5_5_5; + goto out_word4; + } + if (desc->channel[0].size == 10 && + desc->channel[1].size == 10 && + desc->channel[2].size == 10 && + desc->channel[3].size == 2) { + result |= V_0280A0_COLOR_10_10_10_2; + goto out_word4; + } + goto out_unknown; + } + goto out_unknown; + } + + /* uniform formats */ + switch (desc->channel[0].type) { + case UTIL_FORMAT_TYPE_UNSIGNED: + case UTIL_FORMAT_TYPE_SIGNED: + if (!desc->channel[0].normalized && + desc->colorspace != UTIL_FORMAT_COLORSPACE_SRGB) { + goto out_unknown; + } + + switch (desc->channel[0].size) { + case 4: + switch (desc->nr_channels) { + case 2: + result |= V_0280A0_COLOR_4_4; + goto out_word4; + case 4: + result |= V_0280A0_COLOR_4_4_4_4; + goto out_word4; + } + goto out_unknown; + case 8: + switch (desc->nr_channels) { + case 1: + result |= V_0280A0_COLOR_8; + goto out_word4; + case 2: + result |= V_0280A0_COLOR_8_8; + goto out_word4; + case 4: + result |= V_0280A0_COLOR_8_8_8_8; + goto out_word4; + } + goto out_unknown; + case 16: + switch (desc->nr_channels) { + case 1: + result |= V_0280A0_COLOR_16; + goto out_word4; + case 2: + result |= V_0280A0_COLOR_16_16; + goto out_word4; + case 4: + result |= V_0280A0_COLOR_16_16_16_16; + goto out_word4; + } + } + goto out_unknown; + + case UTIL_FORMAT_TYPE_FLOAT: + switch (desc->channel[0].size) { + case 16: + switch (desc->nr_channels) { + case 1: + result |= V_0280A0_COLOR_16_FLOAT; + goto out_word4; + case 2: + result |= V_0280A0_COLOR_16_16_FLOAT; + goto out_word4; + case 4: + result |= V_0280A0_COLOR_16_16_16_16_FLOAT; + goto out_word4; + } + goto out_unknown; + case 32: + switch (desc->nr_channels) { + case 1: + result |= V_0280A0_COLOR_32_FLOAT; + goto out_word4; + case 2: + result |= V_0280A0_COLOR_32_32_FLOAT; + goto out_word4; + case 4: + result |= V_0280A0_COLOR_32_32_32_32_FLOAT; + goto out_word4; + } + } + + } +out_word4: + if (word4_p) + *word4_p = word4; + if (yuv_format_p) + *yuv_format_p = yuv_format; +// fprintf(stderr,"returning %08x %08x %08x\n", result, word4, yuv_format); + return result; +out_unknown: +// R600_ERR("Unable to handle texformat %d %s\n", format, util_format_name(format)); + return ~0; +} -- cgit v1.2.3 From 13bc2098ca21be3d11176e558ca71e29e41a239f Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Thu, 12 Aug 2010 16:29:04 +1000 Subject: r600g: fix provoking-vertex piglit test. --- src/gallium/drivers/r600/r600_state.c | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index 46e8f2ae1f..b9b46d64e3 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -770,7 +770,7 @@ static struct radeon_state *r600_rasterizer(struct r600_context *rctx) char depth = 0; unsigned offset_db_fmt_cntl = 0; unsigned tmp; - + unsigned prov_vtx = 1; if (fb->zsbuf) { offset_units = state->offset_units; offset_scale = state->offset_scale * 12.0f; @@ -796,6 +796,9 @@ static struct radeon_state *r600_rasterizer(struct r600_context *rctx) } offset_db_fmt_cntl |= S_028DF8_POLY_OFFSET_NEG_NUM_DB_BITS(depth); + if (state->flatshade_first) + prov_vtx = 0; + rctx->flat_shade = state->flatshade; rstate = radeon_state(rscreen->rw, R600_RASTERIZER_TYPE, R600_RASTERIZER); if (rstate == NULL) @@ -814,13 +817,14 @@ static struct radeon_state *r600_rasterizer(struct r600_context *rctx) } } rstate->states[R600_RASTERIZER__PA_CL_CLIP_CNTL] = 0x00000000; - rstate->states[R600_RASTERIZER__PA_SU_SC_MODE_CNTL] = 0x00080000 | - S_028814_CULL_FRONT((state->cull_face & PIPE_FACE_FRONT) ? 1 : 0) | - S_028814_CULL_BACK((state->cull_face & PIPE_FACE_BACK) ? 1 : 0) | - S_028814_FACE(!state->front_ccw) | - S_028814_POLY_OFFSET_FRONT_ENABLE(state->offset_tri) | - S_028814_POLY_OFFSET_BACK_ENABLE(state->offset_tri) | - S_028814_POLY_OFFSET_PARA_ENABLE(state->offset_tri); + rstate->states[R600_RASTERIZER__PA_SU_SC_MODE_CNTL] = + S_028814_PROVOKING_VTX_LAST(prov_vtx) | + S_028814_CULL_FRONT((state->cull_face & PIPE_FACE_FRONT) ? 1 : 0) | + S_028814_CULL_BACK((state->cull_face & PIPE_FACE_BACK) ? 1 : 0) | + S_028814_FACE(!state->front_ccw) | + S_028814_POLY_OFFSET_FRONT_ENABLE(state->offset_tri) | + S_028814_POLY_OFFSET_BACK_ENABLE(state->offset_tri) | + S_028814_POLY_OFFSET_PARA_ENABLE(state->offset_tri); rstate->states[R600_RASTERIZER__PA_CL_VS_OUT_CNTL] = S_02881C_USE_VTX_POINT_SIZE(state->point_size_per_vertex) | S_02881C_VS_OUT_MISC_VEC_ENA(state->point_size_per_vertex); -- cgit v1.2.3 From 582129ced65e0a71478a8e75e00397037c4826d8 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Thu, 12 Aug 2010 16:32:19 +1000 Subject: r600g: fix typo in stencil translate. fixes piglit stencil-twoside and stencil-wrap --- src/gallium/drivers/r600/r600_state_inlines.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium') diff --git a/src/gallium/drivers/r600/r600_state_inlines.h b/src/gallium/drivers/r600/r600_state_inlines.h index 060a27cd6f..f93c20da35 100644 --- a/src/gallium/drivers/r600/r600_state_inlines.h +++ b/src/gallium/drivers/r600/r600_state_inlines.h @@ -110,7 +110,7 @@ static INLINE uint32_t r600_translate_stencil_op(int s_op) case PIPE_STENCIL_OP_DECR: return V_028800_STENCIL_DECR; case PIPE_STENCIL_OP_INCR_WRAP: - return V_028800_STENCIL_INVERT; + return V_028800_STENCIL_INCR_WRAP; case PIPE_STENCIL_OP_DECR_WRAP: return V_028800_STENCIL_DECR_WRAP; case PIPE_STENCIL_OP_INVERT: -- cgit v1.2.3 From f668ea11bd0b1f662e0be523a4bc46835e011ffa Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Thu, 12 Aug 2010 13:34:53 +0200 Subject: Revert "u_blitter: unify clear_depth_stencil and flush_depth_stencil" This reverts commit de4784e36505316c2a5ab34cc5b371d17f38d3c5. --- src/gallium/auxiliary/util/u_blitter.c | 46 ++++++++++++++++++++++++++++++++++ src/gallium/auxiliary/util/u_blitter.h | 2 ++ src/gallium/drivers/r300/r300_blit.c | 4 +-- 3 files changed, 49 insertions(+), 3 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/auxiliary/util/u_blitter.c b/src/gallium/auxiliary/util/u_blitter.c index 1b9e957e3c..b5b86b7214 100644 --- a/src/gallium/auxiliary/util/u_blitter.c +++ b/src/gallium/auxiliary/util/u_blitter.c @@ -87,6 +87,7 @@ struct blitter_context_priv void *dsa_write_depth_keep_stencil; void *dsa_keep_depth_stencil; void *dsa_keep_depth_write_stencil; + void *dsa_flush_depth_stencil; void *velem_state; @@ -157,6 +158,12 @@ struct blitter_context *util_blitter_create(struct pipe_context *pipe) pipe->create_depth_stencil_alpha_state(pipe, &dsa); dsa.depth.writemask = 1; + ctx->dsa_flush_depth_stencil = + pipe->create_depth_stencil_alpha_state(pipe, &dsa); + + dsa.depth.enabled = 1; + dsa.depth.writemask = 1; + dsa.depth.func = PIPE_FUNC_ALWAYS; ctx->dsa_write_depth_keep_stencil = pipe->create_depth_stencil_alpha_state(pipe, &dsa); @@ -938,3 +945,42 @@ void util_blitter_clear_depth_stencil(struct blitter_context *blitter, UTIL_BLITTER_ATTRIB_NONE, NULL); blitter_restore_CSOs(ctx); } + +/* Clear a region of a depth stencil surface. */ +void util_blitter_flush_depth_stencil(struct blitter_context *blitter, + struct pipe_surface *dstsurf) +{ + struct blitter_context_priv *ctx = (struct blitter_context_priv*)blitter; + struct pipe_context *pipe = ctx->base.pipe; + struct pipe_framebuffer_state fb_state; + + assert(dstsurf->texture); + if (!dstsurf->texture) + return; + + /* check the saved state */ + blitter_check_saved_CSOs(ctx); + assert(blitter->saved_fb_state.nr_cbufs != ~0); + + /* bind CSOs */ + pipe->bind_blend_state(pipe, ctx->blend_keep_color); + pipe->bind_depth_stencil_alpha_state(pipe, ctx->dsa_flush_depth_stencil); + + pipe->bind_rasterizer_state(pipe, ctx->rs_state); + pipe->bind_fs_state(pipe, blitter_get_fs_col(ctx, 0)); + pipe->bind_vs_state(pipe, ctx->vs_col); + pipe->bind_vertex_elements_state(pipe, ctx->velem_state); + + /* set a framebuffer state */ + fb_state.width = dstsurf->width; + fb_state.height = dstsurf->height; + fb_state.nr_cbufs = 0; + fb_state.cbufs[0] = 0; + fb_state.zsbuf = dstsurf; + pipe->set_framebuffer_state(pipe, &fb_state); + + blitter_set_dst_dimensions(ctx, dstsurf->width, dstsurf->height); + blitter->draw_rectangle(blitter, 0, 0, dstsurf->width, dstsurf->height, 0, + UTIL_BLITTER_ATTRIB_NONE, NULL); + blitter_restore_CSOs(ctx); +} diff --git a/src/gallium/auxiliary/util/u_blitter.h b/src/gallium/auxiliary/util/u_blitter.h index ba3f92eca8..f316587dea 100644 --- a/src/gallium/auxiliary/util/u_blitter.h +++ b/src/gallium/auxiliary/util/u_blitter.h @@ -200,6 +200,8 @@ void util_blitter_clear_depth_stencil(struct blitter_context *blitter, unsigned dstx, unsigned dsty, unsigned width, unsigned height); +void util_blitter_flush_depth_stencil(struct blitter_context *blitter, + struct pipe_surface *dstsurf); /* The functions below should be used to save currently bound constant state * objects inside a driver. The objects are automatically restored at the end * of the util_blitter_{clear, copy_region, fill_region} functions and then diff --git a/src/gallium/drivers/r300/r300_blit.c b/src/gallium/drivers/r300/r300_blit.c index 67e8288440..ff52286b5c 100644 --- a/src/gallium/drivers/r300/r300_blit.c +++ b/src/gallium/drivers/r300/r300_blit.c @@ -279,9 +279,7 @@ void r300_flush_depth_stencil(struct pipe_context *pipe, PIPE_BIND_DEPTH_STENCIL); r300->z_decomp_rd = TRUE; r300_blitter_begin(r300, R300_CLEAR_SURFACE); - util_blitter_clear_depth_stencil(r300->blitter, dstsurf, - PIPE_CLEAR_DEPTH, 0, 0, - 0, 0, dstsurf->width, dstsurf->height); + util_blitter_flush_depth_stencil(r300->blitter, dstsurf); r300_blitter_end(r300); r300->z_decomp_rd = FALSE; -- cgit v1.2.3 From 4d946c4e8adf3f0ac447b6a9a6caf17392b816cd Mon Sep 17 00:00:00 2001 From: Luca Barbieri Date: Thu, 12 Aug 2010 18:18:41 +0200 Subject: translate_test: improve 1. Generate random data specifically for float and doubles, so that they end up in [0, 1] range 2. Don't test useless conversions like SCALED <-> NORM 3. Poison the buffers before testing --- src/gallium/tests/unit/translate_test.c | 66 ++++++++++++++++++++++++++++++--- 1 file changed, 60 insertions(+), 6 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/tests/unit/translate_test.c b/src/gallium/tests/unit/translate_test.c index 097a5fd937..a6f0f54897 100644 --- a/src/gallium/tests/unit/translate_test.c +++ b/src/gallium/tests/unit/translate_test.c @@ -37,7 +37,11 @@ int main(int argc, char** argv) struct translate_key key; unsigned output_format; unsigned input_format; + unsigned buffer_size = 4096; unsigned char* buffer[5]; + unsigned char* byte_buffer; + float* float_buffer; + double* double_buffer; unsigned count = 4; unsigned i, j, k; unsigned passed = 0; @@ -111,8 +115,12 @@ int main(int argc, char** argv) return 2; } - for (i = 0; i < Elements(buffer); ++i) - buffer[i] = align_malloc(4096, 4096); + for (i = 1; i < Elements(buffer); ++i) + buffer[i] = align_malloc(buffer_size, 4096); + + byte_buffer = align_malloc(buffer_size, 4096); + float_buffer = align_malloc(buffer_size, 4096); + double_buffer = align_malloc(buffer_size, 4096); key.nr_elements = 1; key.element[0].input_buffer = 0; @@ -121,14 +129,24 @@ int main(int argc, char** argv) key.element[0].type = TRANSLATE_ELEMENT_NORMAL; key.element[0].instance_divisor = 0; - srand(4359025); - for (i = 0; i < 4096; ++i) - buffer[0][i] = rand() & 0x7f; /* avoid negative values that work badly when converted to unsigned format*/ + srand48(4359025); + + /* avoid negative values that work badly when converted to unsigned format*/ + for (i = 0; i < buffer_size / sizeof(unsigned); ++i) + ((unsigned*)byte_buffer)[i] = mrand48() & 0x7f7f7f7f; + + for (i = 0; i < buffer_size / sizeof(float); ++i) + float_buffer[i] = (float)drand48(); + + for (i = 0; i < buffer_size / sizeof(double); ++i) + double_buffer[i] = drand48(); for (output_format = 1; output_format < PIPE_FORMAT_COUNT; ++output_format) { const struct util_format_description* output_format_desc = util_format_description(output_format); unsigned output_format_size; + unsigned output_normalized = 0; + if (!output_format_desc || !output_format_desc->fetch_rgba_float || !output_format_desc->pack_rgba_float @@ -137,6 +155,12 @@ int main(int argc, char** argv) || !translate_is_output_format_supported(output_format)) continue; + for(i = 0; i < output_format_desc->nr_channels; ++i) + { + if(output_format_desc->channel[i].type != UTIL_FORMAT_TYPE_FLOAT) + output_normalized |= (1 << output_format_desc->channel[i].normalized); + } + output_format_size = util_format_get_stride(output_format, 1); for (input_format = 1; input_format < PIPE_FORMAT_COUNT; ++input_format) @@ -146,6 +170,8 @@ int main(int argc, char** argv) struct translate* translate[2]; unsigned fail = 0; unsigned used_generic = 0; + unsigned input_normalized = 0; + boolean input_is_float = FALSE; if (!input_format_desc || !input_format_desc->fetch_rgba_float @@ -157,6 +183,22 @@ int main(int argc, char** argv) input_format_size = util_format_get_stride(input_format, 1); + for(i = 0; i < input_format_desc->nr_channels; ++i) + { + if(input_format_desc->channel[i].type == UTIL_FORMAT_TYPE_FLOAT) + { + input_is_float = 1; + input_normalized |= 1 << 1; + } + else + input_normalized |= (1 << input_format_desc->channel[i].normalized); + } + + if(((input_normalized | output_normalized) == 3) + || ((input_normalized & 1) && (output_normalized & 1) + && input_format_size * output_format_desc->nr_channels > output_format_size * input_format_desc->nr_channels)) + continue; + key.element[0].input_format = input_format; key.element[0].output_format = output_format; key.output_stride = output_format_size; @@ -176,6 +218,18 @@ int main(int argc, char** argv) continue; } + for(i = 1; i < 5; ++i) + memset(buffer[i], 0xcd - (0x22 * i), 4096); + + if(input_is_float && input_format_desc->channel[0].size == 32) + buffer[0] = (unsigned char*)float_buffer; + else if(input_is_float && input_format_desc->channel[0].size == 64) + buffer[0] = (unsigned char*)double_buffer; + else if(input_is_float) + abort(); + else + buffer[0] = byte_buffer; + translate[0]->set_buffer(translate[0], 0, buffer[0], input_format_size, ~0); translate[0]->run(translate[0], 0, count, 0, buffer[1]); translate[1]->set_buffer(translate[1], 0, buffer[1], output_format_size, ~0); @@ -208,7 +262,7 @@ int main(int argc, char** argv) used_generic ? "[GENERIC]" : "", input_format_desc->name, output_format_desc->name, input_format_desc->name, output_format_desc->name, input_format_desc->name); - if (fail) + if (1) { for (i = 0; i < Elements(buffer); ++i) { -- cgit v1.2.3 From b9abe7f62c09c0395214b9447032323b3846b2cf Mon Sep 17 00:00:00 2001 From: Luca Barbieri Date: Thu, 12 Aug 2010 22:07:37 +0200 Subject: translate_test: fix compilation on non-POSIX platforms Use a kludgy function based on rand() instead of drand48() --- src/gallium/tests/unit/translate_test.c | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/tests/unit/translate_test.c b/src/gallium/tests/unit/translate_test.c index a6f0f54897..960c70f2b5 100644 --- a/src/gallium/tests/unit/translate_test.c +++ b/src/gallium/tests/unit/translate_test.c @@ -30,6 +30,20 @@ #include #include +/* don't use this for serious use */ +static double rand_double() +{ + const double rm = (double)RAND_MAX + 1; + double div = 1; + double v = 0; + for(unsigned i = 0; i < 4; ++i) + { + div *= rm; + v += (double)rand() / div; + } + return v; +} + int main(int argc, char** argv) { struct translate *(*create_fn)(const struct translate_key *key) = 0; @@ -129,17 +143,17 @@ int main(int argc, char** argv) key.element[0].type = TRANSLATE_ELEMENT_NORMAL; key.element[0].instance_divisor = 0; - srand48(4359025); + srand(4359025); /* avoid negative values that work badly when converted to unsigned format*/ - for (i = 0; i < buffer_size / sizeof(unsigned); ++i) - ((unsigned*)byte_buffer)[i] = mrand48() & 0x7f7f7f7f; + for (i = 0; i < buffer_size; ++i) + byte_buffer[i] = rand() & 0x7f7f7f7f; for (i = 0; i < buffer_size / sizeof(float); ++i) - float_buffer[i] = (float)drand48(); + float_buffer[i] = (float)rand_double(); for (i = 0; i < buffer_size / sizeof(double); ++i) - double_buffer[i] = drand48(); + double_buffer[i] = rand_double(); for (output_format = 1; output_format < PIPE_FORMAT_COUNT; ++output_format) { -- cgit v1.2.3 From 26c042c30aa3a2ec7eaf1a2b766bc4fd2bfcdb84 Mon Sep 17 00:00:00 2001 From: Vinson Lee Date: Thu, 12 Aug 2010 17:12:56 -0700 Subject: translate_test: Fix compilation with MSVC. --- src/gallium/tests/unit/translate_test.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'src/gallium') diff --git a/src/gallium/tests/unit/translate_test.c b/src/gallium/tests/unit/translate_test.c index 960c70f2b5..d0946a91a2 100644 --- a/src/gallium/tests/unit/translate_test.c +++ b/src/gallium/tests/unit/translate_test.c @@ -36,7 +36,8 @@ static double rand_double() const double rm = (double)RAND_MAX + 1; double div = 1; double v = 0; - for(unsigned i = 0; i < 4; ++i) + unsigned i; + for(i = 0; i < 4; ++i) { div *= rm; v += (double)rand() / div; -- cgit v1.2.3 From f2804e70623c4c71e5758a24d695f8d6b74bf6d7 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Fri, 13 Aug 2010 10:16:29 +1000 Subject: r600g: fix memory leaks running gears. I noticed gears memory usage was heading skywards, some r600 "states" aren't properly refcounted, and the ctx->state is never freed. --- src/gallium/drivers/r600/r600_state.c | 5 +++++ src/gallium/winsys/r600/drm/radeon_ctx.c | 1 + 2 files changed, 6 insertions(+) (limited to 'src/gallium') diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index b9b46d64e3..3efd409ae0 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -379,6 +379,8 @@ static void r600_set_scissor_state(struct pipe_context *ctx, rstate = r600_context_state(rctx, pipe_scissor_type, state); r600_bind_state(ctx, rstate); + /* refcount is taken care of this */ + r600_delete_state(ctx, rstate); } static void r600_set_stencil_ref(struct pipe_context *ctx, @@ -389,6 +391,8 @@ static void r600_set_stencil_ref(struct pipe_context *ctx, rstate = r600_context_state(rctx, pipe_stencil_ref_type, state); r600_bind_state(ctx, rstate); + /* refcount is taken care of this */ + r600_delete_state(ctx, rstate); } static void r600_set_vertex_buffers(struct pipe_context *ctx, @@ -433,6 +437,7 @@ static void r600_set_viewport_state(struct pipe_context *ctx, rstate = r600_context_state(rctx, pipe_viewport_type, state); r600_bind_state(ctx, rstate); + r600_delete_state(ctx, rstate); } void r600_init_state_functions(struct r600_context *rctx) diff --git a/src/gallium/winsys/r600/drm/radeon_ctx.c b/src/gallium/winsys/r600/drm/radeon_ctx.c index ff70ce6de7..45b706bb0f 100644 --- a/src/gallium/winsys/r600/drm/radeon_ctx.c +++ b/src/gallium/winsys/r600/drm/radeon_ctx.c @@ -112,6 +112,7 @@ struct radeon_ctx *radeon_ctx_decref(struct radeon_ctx *ctx) ctx->bo[i] = radeon_bo_decref(ctx->radeon, ctx->bo[i]); } ctx->radeon = radeon_decref(ctx->radeon); + free(ctx->state); free(ctx->draw); free(ctx->bo); free(ctx->pm4); -- cgit v1.2.3 From 527ac905f6a1cb525ba701df4ca505e1baea6378 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Fri, 13 Aug 2010 09:12:49 +1000 Subject: r600g: fix warning in the winsys --- src/gallium/winsys/r600/drm/r600_drm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium') diff --git a/src/gallium/winsys/r600/drm/r600_drm.c b/src/gallium/winsys/r600/drm/r600_drm.c index 9520792f54..6ebbae0b3f 100644 --- a/src/gallium/winsys/r600/drm/r600_drm.c +++ b/src/gallium/winsys/r600/drm/r600_drm.c @@ -45,7 +45,7 @@ boolean r600_buffer_get_handle(struct radeon *rw, struct winsys_handle *whandle) { struct drm_gem_flink flink; - struct r600_resource* rbuffer = (struct r600_buffer*)buf; + struct r600_resource* rbuffer = (struct r600_resource*)buf; if (whandle->type == DRM_API_HANDLE_TYPE_SHARED) { if (!rbuffer->flink) { -- cgit v1.2.3 From 16e782b83f298fcf82bbb277690ac0c713e90e21 Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Fri, 13 Aug 2010 02:58:51 +0200 Subject: r300g: disable depth clamp for now It breaks Regnum Online in that it renders random triangles all over the screen. https://bugs.freedesktop.org/show_bug.cgi?id=29518 --- src/gallium/drivers/r300/r300_screen.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium') diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c index 6268001054..d1b8e9eed4 100644 --- a/src/gallium/drivers/r300/r300_screen.c +++ b/src/gallium/drivers/r300/r300_screen.c @@ -115,7 +115,6 @@ static int r300_get_param(struct pipe_screen* pscreen, enum pipe_cap param) case PIPE_CAP_TEXTURE_MIRROR_REPEAT: case PIPE_CAP_BLEND_EQUATION_SEPARATE: case PIPE_CAP_TEXTURE_SWIZZLE: - case PIPE_CAP_DEPTH_CLAMP: return 1; /* Unsupported features (boolean caps). */ @@ -124,6 +123,7 @@ static int r300_get_param(struct pipe_screen* pscreen, enum pipe_cap param) case PIPE_CAP_TGSI_CONT_SUPPORTED: case PIPE_CAP_INDEP_BLEND_ENABLE: case PIPE_CAP_INDEP_BLEND_FUNC: + case PIPE_CAP_DEPTH_CLAMP: /* XXX implemented, but breaks Regnum Online */ return 0; /* Texturing. */ -- cgit v1.2.3 From 4985ce1755c5d6f4fa6281150ae0bf7df594146f Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Fri, 13 Aug 2010 03:27:45 +0200 Subject: r600g: update shader caps MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Sent on ML by Владимир. These values are what fglrx returns. --- src/gallium/drivers/r600/r600_screen.c | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/drivers/r600/r600_screen.c b/src/gallium/drivers/r600/r600_screen.c index 6ec842d591..cdaca9ed7d 100644 --- a/src/gallium/drivers/r600/r600_screen.c +++ b/src/gallium/drivers/r600/r600_screen.c @@ -107,29 +107,37 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param) /* Shader limits. */ case PIPE_CAP_MAX_VS_INSTRUCTIONS: + return 16384; //max native instructions, not greater than max instructions case PIPE_CAP_MAX_VS_ALU_INSTRUCTIONS: case PIPE_CAP_MAX_VS_TEX_INSTRUCTIONS: case PIPE_CAP_MAX_VS_TEX_INDIRECTIONS: + return 16384; case PIPE_CAP_MAX_FS_INSTRUCTIONS: + return 16384; //max program native instructions case PIPE_CAP_MAX_FS_ALU_INSTRUCTIONS: + return 16384; //max program native ALU instructions case PIPE_CAP_MAX_FS_TEX_INSTRUCTIONS: + return 16384; //max program native texture instructions case PIPE_CAP_MAX_FS_TEX_INDIRECTIONS: - return 8192; + return 2048; //max program native texture indirections case PIPE_CAP_MAX_VS_CONTROL_FLOW_DEPTH: case PIPE_CAP_MAX_FS_CONTROL_FLOW_DEPTH: return 8; /* FIXME */ case PIPE_CAP_MAX_VS_INPUTS: + return 16; //max native attributes case PIPE_CAP_MAX_FS_INPUTS: - return 32; + return 10; //max native attributes case PIPE_CAP_MAX_VS_TEMPS: + return 256; //max native temporaries case PIPE_CAP_MAX_FS_TEMPS: - return 128; + return 256; //max native temporaries case PIPE_CAP_MAX_VS_ADDRS: case PIPE_CAP_MAX_FS_ADDRS: - return 1; /* FIXME Isn't this equal to TEMPS? */ + return 1; //max native address registers/* FIXME Isn't this equal to TEMPS? */ case PIPE_CAP_MAX_VS_CONSTS: + return 256; //max native parameters case PIPE_CAP_MAX_FS_CONSTS: - return 256; /* FIXME I believe this should be much higher. */ + return 256; //max program native parameters case PIPE_CAP_MAX_CONST_BUFFERS: return 1; case PIPE_CAP_MAX_CONST_BUFFER_SIZE: /* in bytes */ -- cgit v1.2.3 From aa6bdd38af344f7f2ea9956d2ed415f26e8f07cb Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Fri, 13 Aug 2010 04:34:38 +0200 Subject: r300g: fix fastfill when color and Z clear are invoked separately This always restores the previous depth clear value after CBZB clear. --- src/gallium/drivers/r300/r300_blit.c | 2 +- src/gallium/drivers/r300/r300_state.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/drivers/r300/r300_blit.c b/src/gallium/drivers/r300/r300_blit.c index ff52286b5c..5fe9b9017d 100644 --- a/src/gallium/drivers/r300/r300_blit.c +++ b/src/gallium/drivers/r300/r300_blit.c @@ -176,7 +176,7 @@ static void r300_clear(struct pipe_context* pipe, uint32_t width = fb->width; uint32_t height = fb->height; boolean has_hyperz = r300->rws->get_value(r300->rws, R300_CAN_HYPERZ); - uint32_t hyperz_dcv = 0; + uint32_t hyperz_dcv = hyperz->zb_depthclearvalue; /* Enable fast Z clear. * The zbuffer must be in micro-tiled mode, otherwise it locks up. */ diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c index e62a33daeb..8abc65a2b8 100644 --- a/src/gallium/drivers/r300/r300_state.c +++ b/src/gallium/drivers/r300/r300_state.c @@ -796,7 +796,7 @@ static void r300->z_compression, r300->z_fastfill, tex->zmask_mem[level] ? tex->zmask_mem[level]->ofs : 0xdeadbeef); } - + /* Polygon offset depends on the zbuffer bit depth. */ if (r300->zbuffer_bpp != zbuffer_bpp) { r300->zbuffer_bpp = zbuffer_bpp; -- cgit v1.2.3 From bac59b336b7910be8040a3bf98be628775a67af6 Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Fri, 13 Aug 2010 08:16:59 +0200 Subject: r300g: do not support separate depth/stencil clear in the driver It doesn't work well with Hyper-Z, so put the burden on the state tracker. --- src/gallium/drivers/r300/r300_screen.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c index d1b8e9eed4..1e4edcdbc3 100644 --- a/src/gallium/drivers/r300/r300_screen.c +++ b/src/gallium/drivers/r300/r300_screen.c @@ -124,6 +124,7 @@ static int r300_get_param(struct pipe_screen* pscreen, enum pipe_cap param) case PIPE_CAP_INDEP_BLEND_ENABLE: case PIPE_CAP_INDEP_BLEND_FUNC: case PIPE_CAP_DEPTH_CLAMP: /* XXX implemented, but breaks Regnum Online */ + case PIPE_CAP_DEPTHSTENCIL_CLEAR_SEPARATE: return 0; /* Texturing. */ @@ -150,9 +151,6 @@ static int r300_get_param(struct pipe_screen* pscreen, enum pipe_cap param) case PIPE_CAP_MAX_CONST_BUFFER_SIZE: return 256; - case PIPE_CAP_DEPTHSTENCIL_CLEAR_SEPARATE: - return 1; - /* Fragment coordinate conventions. */ case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT: case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER: -- cgit v1.2.3 From d3758feb6b38292d90f4697fa41ea12dce563d5a Mon Sep 17 00:00:00 2001 From: George Sapountzis Date: Wed, 11 Aug 2010 23:18:55 +0300 Subject: st/dri: dri_drawable_get_format is shared between DRI2 and DRISW --- src/gallium/state_trackers/dri/drm/dri2.c | 33 ++----------------------------- 1 file changed, 2 insertions(+), 31 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/state_trackers/dri/drm/dri2.c b/src/gallium/state_trackers/dri/drm/dri2.c index 1fb8996337..2e1c696c0f 100644 --- a/src/gallium/state_trackers/dri/drm/dri2.c +++ b/src/gallium/state_trackers/dri/drm/dri2.c @@ -117,35 +117,6 @@ static const __DRItexBufferExtension dri2TexBufferExtension = { dri2_set_tex_buffer2, }; -/** - * Get the format and binding of an attachment. - */ -static INLINE void -dri2_drawable_get_format(struct dri_drawable *drawable, - enum st_attachment_type statt, - enum pipe_format *format, - unsigned *bind) -{ - switch (statt) { - case ST_ATTACHMENT_FRONT_LEFT: - case ST_ATTACHMENT_BACK_LEFT: - case ST_ATTACHMENT_FRONT_RIGHT: - case ST_ATTACHMENT_BACK_RIGHT: - *format = drawable->stvis.color_format; - *bind = PIPE_BIND_RENDER_TARGET | PIPE_BIND_SAMPLER_VIEW; - break; - case ST_ATTACHMENT_DEPTH_STENCIL: - *format = drawable->stvis.depth_stencil_format; - *bind = PIPE_BIND_DEPTH_STENCIL; /* XXX sampler? */ - break; - default: - *format = PIPE_FORMAT_NONE; - *bind = 0; - break; - } -} - - /** * Retrieve __DRIbuffer from the DRI loader. */ @@ -176,7 +147,7 @@ dri2_drawable_get_buffers(struct dri_drawable *drawable, unsigned bind; int att, bpp; - dri2_drawable_get_format(drawable, statts[i], &format, &bind); + dri_drawable_get_format(drawable, statts[i], &format, &bind); if (format == PIPE_FORMAT_NONE) continue; @@ -318,7 +289,7 @@ dri2_drawable_process_buffers(struct dri_drawable *drawable, break; } - dri2_drawable_get_format(drawable, statt, &format, &bind); + dri_drawable_get_format(drawable, statt, &format, &bind); if (statt == ST_ATTACHMENT_INVALID || format == PIPE_FORMAT_NONE) continue; -- cgit v1.2.3 From 5270deaab682c938e360f0d3b52f35b4027db858 Mon Sep 17 00:00:00 2001 From: nobled Date: Wed, 11 Aug 2010 23:36:52 +0300 Subject: st/dri: Fix segmentation fault in sw drivers --- src/gallium/state_trackers/dri/sw/drisw.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/state_trackers/dri/sw/drisw.c b/src/gallium/state_trackers/dri/sw/drisw.c index ae96f1b20e..249ccd7fcf 100644 --- a/src/gallium/state_trackers/dri/sw/drisw.c +++ b/src/gallium/state_trackers/dri/sw/drisw.c @@ -201,7 +201,7 @@ drisw_allocate_textures(struct dri_drawable *drawable, struct pipe_resource templ; unsigned width, height; boolean resized; - int i; + unsigned i; width = drawable->dPriv->w; height = drawable->dPriv->h; @@ -222,7 +222,7 @@ drisw_allocate_textures(struct dri_drawable *drawable, templ.depth0 = 1; templ.last_level = 0; - for (i = 0; i < ST_ATTACHMENT_COUNT; i++) { + for (i = 0; i < count; i++) { enum pipe_format format; unsigned bind; -- cgit v1.2.3 From c14b4371ed58859d264b7b2581cfedc9cfd8401f Mon Sep 17 00:00:00 2001 From: George Sapountzis Date: Wed, 11 Aug 2010 23:26:14 +0300 Subject: st/dri: move TFP code to dri_drawable.c This is based on a patch by nobled and allows the TFP extension to be enabled for DRISW also. This patch does not enable TFP for DRISW though, because testing on xephyr segfaults here (for both classic and gallium): Program received signal SIGSEGV, Segmentation fault. 0x00786a4a in _mesa_GenTextures (n=1, textures=0xbfffee4c) at main/texobj.c:788 788 ASSERT_OUTSIDE_BEGIN_END(ctx); (gdb) (gdb) where \#0 0x00786a4a in _mesa_GenTextures (n=1, textures=0xbfffee4c) at main/texobj.c:788 \#1 0x0817a004 in __glXDisp_GenTextures () \#2 0x08168498 in __glXDispatch () \#3 0x0808b6ce in Dispatch () \#4 0x08084435 in main () The TFP code is generic except for the teximage call. We need to verify that DRISW correclty implements whatever hook teximage finally calls. --- .../state_trackers/dri/common/dri_drawable.c | 52 +++++++++++++++++++++ .../state_trackers/dri/common/dri_drawable.h | 2 + src/gallium/state_trackers/dri/drm/dri2.c | 53 +--------------------- 3 files changed, 55 insertions(+), 52 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/state_trackers/dri/common/dri_drawable.c b/src/gallium/state_trackers/dri/common/dri_drawable.c index c67ca2224d..fb07b45a2b 100644 --- a/src/gallium/state_trackers/dri/common/dri_drawable.c +++ b/src/gallium/state_trackers/dri/common/dri_drawable.c @@ -30,6 +30,7 @@ */ #include "dri_screen.h" +#include "dri_context.h" #include "dri_drawable.h" #include "pipe/p_screen.h" @@ -184,6 +185,57 @@ dri_drawable_validate_att(struct dri_drawable *drawable, drawable->base.validate(&drawable->base, statts, count, NULL); } +/** + * These are used for GLX_EXT_texture_from_pixmap + */ +static void +dri_set_tex_buffer2(__DRIcontext *pDRICtx, GLint target, + GLint format, __DRIdrawable *dPriv) +{ + struct dri_context *ctx = dri_context(pDRICtx); + struct dri_drawable *drawable = dri_drawable(dPriv); + struct pipe_resource *pt; + + dri_drawable_validate_att(drawable, ST_ATTACHMENT_FRONT_LEFT); + + pt = drawable->textures[ST_ATTACHMENT_FRONT_LEFT]; + + if (pt) { + enum pipe_format internal_format = pt->format; + + if (format == __DRI_TEXTURE_FORMAT_RGB) { + /* only need to cover the formats recognized by dri_fill_st_visual */ + switch (internal_format) { + case PIPE_FORMAT_B8G8R8A8_UNORM: + internal_format = PIPE_FORMAT_B8G8R8X8_UNORM; + break; + case PIPE_FORMAT_A8R8G8B8_UNORM: + internal_format = PIPE_FORMAT_X8R8G8B8_UNORM; + break; + default: + break; + } + } + + ctx->st->teximage(ctx->st, + (target == GL_TEXTURE_2D) ? ST_TEXTURE_2D : ST_TEXTURE_RECT, + 0, internal_format, pt, FALSE); + } +} + +static void +dri_set_tex_buffer(__DRIcontext *pDRICtx, GLint target, + __DRIdrawable *dPriv) +{ + dri_set_tex_buffer2(pDRICtx, target, __DRI_TEXTURE_FORMAT_RGBA, dPriv); +} + +const __DRItexBufferExtension driTexBufferExtension = { + { __DRI_TEX_BUFFER, __DRI_TEX_BUFFER_VERSION }, + dri_set_tex_buffer, + dri_set_tex_buffer2, +}; + /** * Get the format and binding of an attachment. */ diff --git a/src/gallium/state_trackers/dri/common/dri_drawable.h b/src/gallium/state_trackers/dri/common/dri_drawable.h index 3f2e24fc15..8d108cc415 100644 --- a/src/gallium/state_trackers/dri/common/dri_drawable.h +++ b/src/gallium/state_trackers/dri/common/dri_drawable.h @@ -93,6 +93,8 @@ void dri_drawable_validate_att(struct dri_drawable *drawable, enum st_attachment_type statt); +extern const __DRItexBufferExtension driTexBufferExtension; + #endif /* vim: set sw=3 ts=8 sts=3 expandtab: */ diff --git a/src/gallium/state_trackers/dri/drm/dri2.c b/src/gallium/state_trackers/dri/drm/dri2.c index 2e1c696c0f..47005c17e2 100644 --- a/src/gallium/state_trackers/dri/drm/dri2.c +++ b/src/gallium/state_trackers/dri/drm/dri2.c @@ -66,57 +66,6 @@ static const __DRI2flushExtension dri2FlushExtension = { dri2_invalidate_drawable, }; -/** - * These are used for GLX_EXT_texture_from_pixmap - */ -static void -dri2_set_tex_buffer2(__DRIcontext *pDRICtx, GLint target, - GLint format, __DRIdrawable *dPriv) -{ - struct dri_context *ctx = dri_context(pDRICtx); - struct dri_drawable *drawable = dri_drawable(dPriv); - struct pipe_resource *pt; - - dri_drawable_validate_att(drawable, ST_ATTACHMENT_FRONT_LEFT); - - pt = drawable->textures[ST_ATTACHMENT_FRONT_LEFT]; - - if (pt) { - enum pipe_format internal_format = pt->format; - - if (format == __DRI_TEXTURE_FORMAT_RGB) { - /* only need to cover the formats recognized by dri_fill_st_visual */ - switch (internal_format) { - case PIPE_FORMAT_B8G8R8A8_UNORM: - internal_format = PIPE_FORMAT_B8G8R8X8_UNORM; - break; - case PIPE_FORMAT_A8R8G8B8_UNORM: - internal_format = PIPE_FORMAT_X8R8G8B8_UNORM; - break; - default: - break; - } - } - - ctx->st->teximage(ctx->st, - (target == GL_TEXTURE_2D) ? ST_TEXTURE_2D : ST_TEXTURE_RECT, - 0, internal_format, pt, FALSE); - } -} - -static void -dri2_set_tex_buffer(__DRIcontext *pDRICtx, GLint target, - __DRIdrawable *dPriv) -{ - dri2_set_tex_buffer2(pDRICtx, target, __DRI_TEXTURE_FORMAT_RGBA, dPriv); -} - -static const __DRItexBufferExtension dri2TexBufferExtension = { - { __DRI_TEX_BUFFER, __DRI_TEX_BUFFER_VERSION }, - dri2_set_tex_buffer, - dri2_set_tex_buffer2, -}; - /** * Retrieve __DRIbuffer from the DRI loader. */ @@ -454,7 +403,7 @@ static const __DRIextension *dri_screen_extensions[] = { &driCopySubBufferExtension.base, &driSwapControlExtension.base, &driMediaStreamCounterExtension.base, - &dri2TexBufferExtension.base, + &driTexBufferExtension.base, &dri2FlushExtension.base, &dri2ImageExtension.base, &dri2ConfigQueryExtension.base, -- cgit v1.2.3 From 5c0a0f219843fa283a0fd98dbe0544e8ea820c60 Mon Sep 17 00:00:00 2001 From: George Sapountzis Date: Thu, 12 Aug 2010 00:03:56 +0300 Subject: st/dri: make dri_drawable_validate_att static --- src/gallium/state_trackers/dri/common/dri_drawable.c | 5 ++--- src/gallium/state_trackers/dri/common/dri_drawable.h | 4 ---- 2 files changed, 2 insertions(+), 7 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/state_trackers/dri/common/dri_drawable.c b/src/gallium/state_trackers/dri/common/dri_drawable.c index fb07b45a2b..1bdfdccf43 100644 --- a/src/gallium/state_trackers/dri/common/dri_drawable.c +++ b/src/gallium/state_trackers/dri/common/dri_drawable.c @@ -158,9 +158,9 @@ dri_destroy_buffer(__DRIdrawable * dPriv) /** * Validate the texture at an attachment. Allocate the texture if it does not - * exist. + * exist. Used by the TFP extension. */ -void +static void dri_drawable_validate_att(struct dri_drawable *drawable, enum st_attachment_type statt) { @@ -181,7 +181,6 @@ dri_drawable_validate_att(struct dri_drawable *drawable, drawable->texture_stamp = drawable->dPriv->lastStamp - 1; - /* this calles into the manager */ drawable->base.validate(&drawable->base, statts, count, NULL); } diff --git a/src/gallium/state_trackers/dri/common/dri_drawable.h b/src/gallium/state_trackers/dri/common/dri_drawable.h index 8d108cc415..74e662d36c 100644 --- a/src/gallium/state_trackers/dri/common/dri_drawable.h +++ b/src/gallium/state_trackers/dri/common/dri_drawable.h @@ -89,10 +89,6 @@ dri_drawable_get_format(struct dri_drawable *drawable, enum pipe_format *format, unsigned *bind); -void -dri_drawable_validate_att(struct dri_drawable *drawable, - enum st_attachment_type statt); - extern const __DRItexBufferExtension driTexBufferExtension; #endif -- cgit v1.2.3 From eacb624a4a11867427955c812e64c00d5c82bcdd Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Sat, 14 Aug 2010 18:02:47 +0100 Subject: gallivm: Refactor the Newton-Rapshon steps, and disable once again. It causes a very ugly corruption on the Earth's halo on Google Earth. --- src/gallium/auxiliary/gallivm/lp_bld_arit.c | 111 +++++++++++++++++++++------- 1 file changed, 83 insertions(+), 28 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c b/src/gallium/auxiliary/gallivm/lp_bld_arit.c index 816ee70119..7b35dd4bb4 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c @@ -1,6 +1,6 @@ /************************************************************************** * - * Copyright 2009 VMware, Inc. + * Copyright 2009-2010 VMware, Inc. * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a @@ -59,6 +59,19 @@ #include "lp_bld_arit.h" +/* + * XXX: Increasing eliminates some artifacts, but adds others, most + * noticeably corruption in the Earth halo in Google Earth. + */ +#define RCP_NEWTON_STEPS 0 + +#define RSQRT_NEWTON_STEPS 0 + +#define EXP_POLY_DEGREE 3 + +#define LOG_POLY_DEGREE 5 + + /** * Generate min(a, b) * No checks for special case values of a or b = 1 or 0 are done. @@ -1248,6 +1261,31 @@ lp_build_sqrt(struct lp_build_context *bld, } +/** + * Do one Newton-Raphson step to improve reciprocate precision: + * + * x_{i+1} = x_i * (2 - a * x_i) + * + * See also: + * - http://en.wikipedia.org/wiki/Division_(digital)#Newton.E2.80.93Raphson_division + * - http://softwarecommunity.intel.com/articles/eng/1818.htm + */ +static INLINE LLVMValueRef +lp_build_rcp_refine(struct lp_build_context *bld, + LLVMValueRef a, + LLVMValueRef rcp_a) +{ + LLVMValueRef two = lp_build_const_vec(bld->type, 2.0); + LLVMValueRef res; + + res = LLVMBuildFMul(bld->builder, a, rcp_a, ""); + res = LLVMBuildFSub(bld->builder, two, res, ""); + res = LLVMBuildFMul(bld->builder, rcp_a, res, ""); + + return res; +} + + LLVMValueRef lp_build_rcp(struct lp_build_context *bld, LLVMValueRef a) @@ -1269,38 +1307,49 @@ lp_build_rcp(struct lp_build_context *bld, return LLVMConstFDiv(bld->one, a); if(util_cpu_caps.has_sse && type.width == 32 && type.length == 4) { - /* - * XXX: Added precision is not always necessary, so only enable this - * when we have a better system in place to track minimum precision. - */ - -#if 1 - /* - * Do one Newton-Raphson step to improve precision: - * - * x1 = (2 - a * rcp(a)) * rcp(a) - */ - - LLVMValueRef two = lp_build_const_vec(bld->type, 2.0); - LLVMValueRef rcp_a; LLVMValueRef res; + unsigned i; - rcp_a = lp_build_intrinsic_unary(bld->builder, "llvm.x86.sse.rcp.ps", lp_build_vec_type(type), a); + res = lp_build_intrinsic_unary(bld->builder, "llvm.x86.sse.rcp.ps", bld->vec_type, a); - res = LLVMBuildFMul(bld->builder, a, rcp_a, ""); - res = LLVMBuildFSub(bld->builder, two, res, ""); - res = LLVMBuildFMul(bld->builder, res, rcp_a, ""); + for (i = 0; i < RCP_NEWTON_STEPS; ++i) { + res = lp_build_rcp_refine(bld, a, res); + } return res; -#else - return lp_build_intrinsic_unary(bld->builder, "llvm.x86.sse.rcp.ps", lp_build_vec_type(type), a); -#endif } return LLVMBuildFDiv(bld->builder, bld->one, a, ""); } +/** + * Do one Newton-Raphson step to improve rsqrt precision: + * + * x_{i+1} = 0.5 * x_i * (3.0 - a * x_i * x_i) + * + * See also: + * - http://softwarecommunity.intel.com/articles/eng/1818.htm + */ +static INLINE LLVMValueRef +lp_build_rsqrt_refine(struct lp_build_context *bld, + LLVMValueRef a, + LLVMValueRef rsqrt_a) +{ + LLVMValueRef half = lp_build_const_vec(bld->type, 0.5); + LLVMValueRef three = lp_build_const_vec(bld->type, 3.0); + LLVMValueRef res; + + res = LLVMBuildFMul(bld->builder, rsqrt_a, rsqrt_a, ""); + res = LLVMBuildFMul(bld->builder, a, res, ""); + res = LLVMBuildFSub(bld->builder, three, res, ""); + res = LLVMBuildFMul(bld->builder, rsqrt_a, res, ""); + res = LLVMBuildFMul(bld->builder, half, res, ""); + + return res; +} + + /** * Generate 1/sqrt(a) */ @@ -1314,8 +1363,18 @@ lp_build_rsqrt(struct lp_build_context *bld, assert(type.floating); - if(util_cpu_caps.has_sse && type.width == 32 && type.length == 4) - return lp_build_intrinsic_unary(bld->builder, "llvm.x86.sse.rsqrt.ps", lp_build_vec_type(type), a); + if(util_cpu_caps.has_sse && type.width == 32 && type.length == 4) { + LLVMValueRef res; + unsigned i; + + res = lp_build_intrinsic_unary(bld->builder, "llvm.x86.sse.rsqrt.ps", bld->vec_type, a); + + for (i = 0; i < RSQRT_NEWTON_STEPS; ++i) { + res = lp_build_rsqrt_refine(bld, a, res); + } + + return res; + } return lp_build_rcp(bld, lp_build_sqrt(bld, a)); } @@ -1821,10 +1880,6 @@ lp_build_log(struct lp_build_context *bld, } -#define EXP_POLY_DEGREE 3 -#define LOG_POLY_DEGREE 5 - - /** * Generate polynomial. * Ex: coeffs[0] + x * coeffs[1] + x^2 * coeffs[2]. -- cgit v1.2.3 From 923256626931c057d1a7c20d8900768b0c1faea9 Mon Sep 17 00:00:00 2001 From: Luca Barbieri Date: Fri, 13 Aug 2010 15:26:29 +0200 Subject: u_cpu_detect: remove arch and little_endian This logic duplicates the one in p_config.h, so remove it and adjust the only two places that were using it. --- src/gallium/auxiliary/gallivm/lp_bld_pack.c | 7 +++---- src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c | 6 +++++- src/gallium/auxiliary/util/u_cpu_detect.c | 18 ------------------ src/gallium/auxiliary/util/u_cpu_detect.h | 13 +------------ 4 files changed, 9 insertions(+), 35 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/auxiliary/gallivm/lp_bld_pack.c b/src/gallium/auxiliary/gallivm/lp_bld_pack.c index ecfb13a0d4..b7b630f2e8 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_pack.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_pack.c @@ -171,14 +171,13 @@ lp_build_unpack2(LLVMBuilderRef builder, msb = lp_build_zero(src_type); /* Interleave bits */ - if(util_cpu_caps.little_endian) { +#ifdef PIPE_ARCH_LITTLE_ENDIAN *dst_lo = lp_build_interleave2(builder, src_type, src, msb, 0); *dst_hi = lp_build_interleave2(builder, src_type, src, msb, 1); - } - else { +#else *dst_lo = lp_build_interleave2(builder, src_type, msb, src, 0); *dst_hi = lp_build_interleave2(builder, src_type, msb, src, 1); - } +#endif /* Cast the result into the new type (twice as wide) */ diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c index 307506507d..02d43e373a 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c @@ -1840,7 +1840,11 @@ lp_build_sample_2d_linear_aos(struct lp_build_sample_context *bld, unsigned i, j; for(j = 0; j < h16.type.length; j += 4) { - unsigned subindex = util_cpu_caps.little_endian ? 0 : 1; +#ifdef PIPE_ARCH_LITTLE_ENDIAN + unsigned subindex = 0; +#else + unsigned subindex = 1; +#endif LLVMValueRef index; index = LLVMConstInt(elem_type, j/2 + subindex, 0); diff --git a/src/gallium/auxiliary/util/u_cpu_detect.c b/src/gallium/auxiliary/util/u_cpu_detect.c index b1a8c75b99..2bbc554a90 100644 --- a/src/gallium/auxiliary/util/u_cpu_detect.c +++ b/src/gallium/auxiliary/util/u_cpu_detect.c @@ -391,23 +391,6 @@ util_cpu_detect(void) memset(&util_cpu_caps, 0, sizeof util_cpu_caps); - /* Check for arch type */ -#if defined(PIPE_ARCH_MIPS) - util_cpu_caps.arch = UTIL_CPU_ARCH_MIPS; -#elif defined(PIPE_ARCH_ALPHA) - util_cpu_caps.arch = UTIL_CPU_ARCH_ALPHA; -#elif defined(PIPE_ARCH_SPARC) - util_cpu_caps.arch = UTIL_CPU_ARCH_SPARC; -#elif defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) - util_cpu_caps.arch = UTIL_CPU_ARCH_X86; - util_cpu_caps.little_endian = 1; -#elif defined(PIPE_ARCH_PPC) - util_cpu_caps.arch = UTIL_CPU_ARCH_POWERPC; - util_cpu_caps.little_endian = 0; -#else - util_cpu_caps.arch = UTIL_CPU_ARCH_UNKNOWN; -#endif - /* Count the number of CPUs in system */ #if defined(PIPE_OS_WINDOWS) { @@ -504,7 +487,6 @@ util_cpu_detect(void) #ifdef DEBUG if (debug_get_option_dump_cpu()) { - debug_printf("util_cpu_caps.arch = %i\n", util_cpu_caps.arch); debug_printf("util_cpu_caps.nr_cpus = %u\n", util_cpu_caps.nr_cpus); debug_printf("util_cpu_caps.x86_cpu_type = %u\n", util_cpu_caps.x86_cpu_type); diff --git a/src/gallium/auxiliary/util/u_cpu_detect.h b/src/gallium/auxiliary/util/u_cpu_detect.h index 4b3dc39c34..f3bef0993c 100644 --- a/src/gallium/auxiliary/util/u_cpu_detect.h +++ b/src/gallium/auxiliary/util/u_cpu_detect.h @@ -36,26 +36,15 @@ #define _UTIL_CPU_DETECT_H #include "pipe/p_compiler.h" - -enum util_cpu_arch { - UTIL_CPU_ARCH_UNKNOWN = 0, - UTIL_CPU_ARCH_MIPS, - UTIL_CPU_ARCH_ALPHA, - UTIL_CPU_ARCH_SPARC, - UTIL_CPU_ARCH_X86, - UTIL_CPU_ARCH_POWERPC -}; +#include "pipe/p_config.h" struct util_cpu_caps { - enum util_cpu_arch arch; unsigned nr_cpus; /* Feature flags */ int x86_cpu_type; unsigned cacheline; - unsigned little_endian:1; - unsigned has_tsc:1; unsigned has_mmx:1; unsigned has_mmx2:1; -- cgit v1.2.3 From e01a49af61a4d56800b1ad672959ba7a88c1da1e Mon Sep 17 00:00:00 2001 From: Vinson Lee Date: Sat, 14 Aug 2010 11:44:01 -0700 Subject: r300g: Remove unnecessary header. --- src/gallium/winsys/radeon/drm/radeon_drm.c | 1 - 1 file changed, 1 deletion(-) (limited to 'src/gallium') diff --git a/src/gallium/winsys/radeon/drm/radeon_drm.c b/src/gallium/winsys/radeon/drm/radeon_drm.c index 593741b86c..86d4f94969 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm.c +++ b/src/gallium/winsys/radeon/drm/radeon_drm.c @@ -39,7 +39,6 @@ #include "util/u_memory.h" #include "xf86drm.h" -#include static struct radeon_libdrm_winsys * radeon_winsys_create(int fd) -- cgit v1.2.3 From 77458b565a531721904b7f3e31b71505e763b97a Mon Sep 17 00:00:00 2001 From: Vinson Lee Date: Sat, 14 Aug 2010 11:56:26 -0700 Subject: r600g: Remove unnecessary headers. --- src/gallium/winsys/r600/drm/r600_drm.c | 1 - src/gallium/winsys/r600/drm/radeon.c | 1 - 2 files changed, 2 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/winsys/r600/drm/r600_drm.c b/src/gallium/winsys/r600/drm/r600_drm.c index 6ebbae0b3f..c76e7f5fa5 100644 --- a/src/gallium/winsys/r600/drm/r600_drm.c +++ b/src/gallium/winsys/r600/drm/r600_drm.c @@ -31,7 +31,6 @@ #include "radeon_priv.h" #include "r600_screen.h" #include "r600_resource.h" -#include "r600_public.h" #include "r600_drm_public.h" #include "state_tracker/drm_driver.h" diff --git a/src/gallium/winsys/r600/drm/radeon.c b/src/gallium/winsys/r600/drm/radeon.c index 7e65669806..80b0a1d397 100644 --- a/src/gallium/winsys/r600/drm/radeon.c +++ b/src/gallium/winsys/r600/drm/radeon.c @@ -23,7 +23,6 @@ #include "xf86drm.h" #include "radeon_priv.h" #include "radeon_drm.h" -#include "r600d.h" enum radeon_family radeon_get_family(struct radeon *radeon) { -- cgit v1.2.3 From db0e24e34a4680c70038715f23847b9b2f7f75d3 Mon Sep 17 00:00:00 2001 From: Vinson Lee Date: Sat, 14 Aug 2010 12:18:51 -0700 Subject: scons: Fix r600g build. --- src/gallium/targets/dri-r600/SConscript | 2 +- src/gallium/winsys/r600/drm/SConscript | 25 +++++++++++++++++++++++++ 2 files changed, 26 insertions(+), 1 deletion(-) create mode 100644 src/gallium/winsys/r600/drm/SConscript (limited to 'src/gallium') diff --git a/src/gallium/targets/dri-r600/SConscript b/src/gallium/targets/dri-r600/SConscript index 97c5df01fe..64d6d2a7f6 100644 --- a/src/gallium/targets/dri-r600/SConscript +++ b/src/gallium/targets/dri-r600/SConscript @@ -12,7 +12,7 @@ env.Append(CPPDEFINES = ['GALLIUM_RBUG', 'GALLIUM_TRACE']) env.Prepend(LIBS = [ st_dri, - r600drm, + r600winsys, r600, trace, rbug, diff --git a/src/gallium/winsys/r600/drm/SConscript b/src/gallium/winsys/r600/drm/SConscript new file mode 100644 index 0000000000..2f20d9f895 --- /dev/null +++ b/src/gallium/winsys/r600/drm/SConscript @@ -0,0 +1,25 @@ +Import('*') + +env = env.Clone() + +r600_sources = [ + 'bof.c', + 'r600_state.c', + 'radeon_ctx.c', + 'radeon_draw.c', + 'radeon_state.c', + 'radeon_bo.c', + 'radeon_pciid.c', + 'radeon.c', + 'r600_drm.c' +] + +env.ParseConfig('pkg-config --cflags libdrm_radeon') +env.Append(CPPPATH = '#/src/gallium/drivers/r600') + +r600winsys = env.ConvenienceLibrary( + target ='r600winsys', + source = r600_sources, +) + +Export('r600winsys') -- cgit v1.2.3 From 10d77f3f6b86eeb3ec1d9736c02335831e5c73c2 Mon Sep 17 00:00:00 2001 From: Vinson Lee Date: Sat, 14 Aug 2010 12:54:21 -0700 Subject: gallivm: Remove unnecessary header. --- src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c | 1 - 1 file changed, 1 deletion(-) (limited to 'src/gallium') diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c index 02d43e373a..806c7d56a8 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c @@ -40,7 +40,6 @@ #include "util/u_memory.h" #include "util/u_math.h" #include "util/u_format.h" -#include "util/u_cpu_detect.h" #include "lp_bld_debug.h" #include "lp_bld_type.h" #include "lp_bld_const.h" -- cgit v1.2.3 From 7f5202be63c6dc639e57d11ef8253e79dd349f59 Mon Sep 17 00:00:00 2001 From: Jakob Bornecrantz Date: Sun, 15 Aug 2010 01:00:42 +0100 Subject: gallium: Make printing info on debug builds default off This commit silences the printing off most of the debug information when running debug builds. The big culprits are: the tgsi sanity checker that gets run on all shaders on debug; all the options; and finaly the cpu caps printer. --- src/gallium/auxiliary/tgsi/tgsi_sanity.c | 2 +- src/gallium/auxiliary/tgsi/tgsi_sanity.h | 2 +- src/gallium/auxiliary/util/u_cpu_detect.c | 2 +- src/gallium/auxiliary/util/u_debug.c | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/auxiliary/tgsi/tgsi_sanity.c b/src/gallium/auxiliary/tgsi/tgsi_sanity.c index 287ee006cf..acbff103ef 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_sanity.c +++ b/src/gallium/auxiliary/tgsi/tgsi_sanity.c @@ -34,7 +34,7 @@ #include "tgsi_iterate.h" -DEBUG_GET_ONCE_BOOL_OPTION(print_sanity, "TGSI_PRINT_SANITY", TRUE) +DEBUG_GET_ONCE_BOOL_OPTION(print_sanity, "TGSI_PRINT_SANITY", FALSE) typedef struct { diff --git a/src/gallium/auxiliary/tgsi/tgsi_sanity.h b/src/gallium/auxiliary/tgsi/tgsi_sanity.h index 46d8d18419..73f0f414e3 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_sanity.h +++ b/src/gallium/auxiliary/tgsi/tgsi_sanity.h @@ -36,7 +36,7 @@ extern "C" { /* Check the given token stream for errors and common mistakes. * Diagnostic messages are printed out to the debug output, and is - * controlled by the debug option TGSI_PRINT_SANITY (default true). + * controlled by the debug option TGSI_PRINT_SANITY (default false). * Returns TRUE if there are no errors, even though there could be some warnings. */ boolean diff --git a/src/gallium/auxiliary/util/u_cpu_detect.c b/src/gallium/auxiliary/util/u_cpu_detect.c index 2bbc554a90..5056351307 100644 --- a/src/gallium/auxiliary/util/u_cpu_detect.c +++ b/src/gallium/auxiliary/util/u_cpu_detect.c @@ -73,7 +73,7 @@ #endif -DEBUG_GET_ONCE_BOOL_OPTION(dump_cpu, "GALLIUM_DUMP_CPU", TRUE) +DEBUG_GET_ONCE_BOOL_OPTION(dump_cpu, "GALLIUM_DUMP_CPU", FALSE) struct util_cpu_caps util_cpu_caps; diff --git a/src/gallium/auxiliary/util/u_debug.c b/src/gallium/auxiliary/util/u_debug.c index ad162558bc..504e6d2a18 100644 --- a/src/gallium/auxiliary/util/u_debug.c +++ b/src/gallium/auxiliary/util/u_debug.c @@ -88,7 +88,7 @@ debug_get_option_should_print(void) * but its cool since we set first to false */ first = FALSE; - value = debug_get_bool_option("GALLIUM_PRINT_OPTIONS", TRUE); + value = debug_get_bool_option("GALLIUM_PRINT_OPTIONS", FALSE); /* XXX should we print this option? Currently it wont */ return value; } -- cgit v1.2.3 From d62b29f567f7dd41d7abf3c931065ea54e90f48b Mon Sep 17 00:00:00 2001 From: Jakob Bornecrantz Date: Sun, 15 Aug 2010 00:59:57 +0100 Subject: gallium/docs: Add a debugging section --- src/gallium/docs/source/debugging.rst | 96 +++++++++++++++++++++++++++++++++++ src/gallium/docs/source/index.rst | 1 + 2 files changed, 97 insertions(+) create mode 100644 src/gallium/docs/source/debugging.rst (limited to 'src/gallium') diff --git a/src/gallium/docs/source/debugging.rst b/src/gallium/docs/source/debugging.rst new file mode 100644 index 0000000000..74f51bc0ea --- /dev/null +++ b/src/gallium/docs/source/debugging.rst @@ -0,0 +1,96 @@ +Debugging +========= + +Debugging utilities in gallium. + +Debug Variables +^^^^^^^^^^^^^^^ + +All drivers respond to a couple of debug enviromental variables. Below is +a collection of them. Set them as you would any normal enviromental variable +for the platform/operating system you are running. For linux this can be +done by typing "export var=value" into a console and then running the +program from that console. + +Common +"""""" + +GALLIUM_PRINT_OPTIONS (false) + +This options controls if the debug variables should be printed to stderr. +This is probably the most usefull variable since it allows you to find +which variables a driver responds to. + +GALLIUM_RBUG (false) + +Controls if the :ref:`rbug` should be used. + +GALLIUM_TRACE ("") + +If not set tracing is not used, if set it will write the output to the file +specifed by the variable. So setting it to "trace.xml" will write the output +to the file "trace.xml". + +GALLIUM_DUMP_CPU (false) + +Dump information about the current cpu that the driver is running on. + +TGSI_PRINT_SANITY (false) + +Gallium has a inbuilt shader sanity checker, this option controls if results +from it should be printed. This include warnings such as unused variables. + +DRAW_USE_LLVM (false) + +Should the :ref:`draw` module use llvm for vertex and geometry shaders. + +ST_DEBUG (0x0) + +Debug :ref:`flags` for the GL state tracker. + + +Driver specific +""""""""""""""" + +I915_DEBUG (0x0) + +Debug :ref:`flags` for the i915 driver. + +I915_NO_HW (false) + +Stop the i915 driver from submitting commands to the hardware. + +I915_DUMP_CMD (false) + +Dump all commands going to the hardware. + +LP_DEBUG (0x0) + +Debug :ref:`flags` for the llvmpipe driver. + +LP_NUM_THREADS (num cpus) + +Number of threads that the llvmpipe driver should use. + + +.. _flags: + +Flags +""""" + +The variables of type all take a string with comma seperated +flags to enable different debugging for different parts of the drivers +or state tracker. If set to "help" the driver will print a list of flags +to which the variable can be set to. Order does not matter. + + +.. _rbug: + +Remote Debugger +^^^^^^^^^^^^^^^ + +Or rbug for short allows for runtime inspections of :ref:`Context`, +:ref:`Screen`, Resources and Shaders; pauseing and stepping of draw calls; +and runtime disable and replacement of shaders. Is used with rbug-gui which +is hosted outside of the main mesa repositor. Rbug is can be used over a +network connection so the debbuger does not need to be on the same machine. diff --git a/src/gallium/docs/source/index.rst b/src/gallium/docs/source/index.rst index 54bc883fce..6c19842dac 100644 --- a/src/gallium/docs/source/index.rst +++ b/src/gallium/docs/source/index.rst @@ -12,6 +12,7 @@ Contents: :maxdepth: 2 intro + debugging tgsi screen context -- cgit v1.2.3 From fe19935a127916ab2f1957c48ad1da652935928f Mon Sep 17 00:00:00 2001 From: Jakob Bornecrantz Date: Sun, 15 Aug 2010 00:57:18 +0100 Subject: gallium/docs: Add rbug to distribution --- src/gallium/docs/source/distro.rst | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'src/gallium') diff --git a/src/gallium/docs/source/distro.rst b/src/gallium/docs/source/distro.rst index e379ad3271..70d75b51e6 100644 --- a/src/gallium/docs/source/distro.rst +++ b/src/gallium/docs/source/distro.rst @@ -74,6 +74,11 @@ Trace Wrapper driver. Trace dumps an XML record of the calls made to the :ref:`Context` and :ref:`Screen` objects that it wraps. +Rbug +^^^^ + +Wrapper driver. :ref:`rbug` driver used with stand alone rbug-gui. + State Trackers -------------- -- cgit v1.2.3 From 0d699e8ee930c6c7e0f8abff14bf37e7f67807fe Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Sat, 14 Aug 2010 22:35:23 +0200 Subject: r300g: separate num_cs_end_dwords out from prepare_for_rendering --- src/gallium/drivers/r300/r300_emit.c | 11 ++++++++++ src/gallium/drivers/r300/r300_emit.h | 1 + src/gallium/drivers/r300/r300_render.c | 37 ++++++++++++---------------------- 3 files changed, 25 insertions(+), 24 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c index 98958d1a2e..ccf360b6ea 100644 --- a/src/gallium/drivers/r300/r300_emit.c +++ b/src/gallium/drivers/r300/r300_emit.c @@ -1209,6 +1209,17 @@ unsigned r300_get_num_dirty_dwords(struct r300_context *r300) return dwords; } +unsigned r300_get_num_cs_end_dwords(struct r300_context *r300) +{ + unsigned dwords = 0; + + /* Emitted in flush. */ + dwords += 26; /* emit_query_end */ + dwords += r300->hyperz_state.size + 2; /* emit_hyperz_end + zcache flush */ + + return dwords; +} + /* Emit all dirty state. */ void r300_emit_dirty_state(struct r300_context* r300) { diff --git a/src/gallium/drivers/r300/r300_emit.h b/src/gallium/drivers/r300/r300_emit.h index 2f2c2f2dcb..bae2525634 100644 --- a/src/gallium/drivers/r300/r300_emit.h +++ b/src/gallium/drivers/r300/r300_emit.h @@ -116,6 +116,7 @@ void r300_emit_hiz_clear(struct r300_context *r300, unsigned size, void *state); void r300_emit_zmask_clear(struct r300_context *r300, unsigned size, void *state); unsigned r300_get_num_dirty_dwords(struct r300_context *r300); +unsigned r300_get_num_cs_end_dwords(struct r300_context *r300); /* Emit all dirty state. */ void r300_emit_dirty_state(struct r300_context* r300); diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c index f2ff65b261..86b11ca045 100644 --- a/src/gallium/drivers/r300/r300_render.c +++ b/src/gallium/drivers/r300/r300_render.c @@ -186,20 +186,14 @@ enum r300_prepare_flags { * \param cs_dwords The number of dwords to reserve in CS. * \param aos_offset The offset passed to emit_aos. * \param index_bias The index bias to emit. - * \param end_cs_dwords The number of free dwords which must be available - * at the end of CS after drawing in case the CS space - * management is performed by a draw_* function manually. - * The parameter may be NULL. */ static void r300_prepare_for_rendering(struct r300_context *r300, enum r300_prepare_flags flags, struct pipe_resource *index_buffer, unsigned cs_dwords, int aos_offset, - int index_bias, - unsigned *end_cs_dwords) + int index_bias) { - unsigned end_dwords = 0; boolean flushed = FALSE; boolean first_draw = flags & PREP_FIRST_DRAW; boolean emit_aos = flags & PREP_EMIT_AOS; @@ -221,11 +215,7 @@ static void r300_prepare_for_rendering(struct r300_context *r300, cs_dwords += 7; /* emit_aos_swtcl */ } - /* Emitted in flush. */ - end_dwords += 26; /* emit_query_end */ - end_dwords += r300->hyperz_state.size + 2; /* emit_hyperz_end + zcache flush */ - - cs_dwords += end_dwords; + cs_dwords += r300_get_num_cs_end_dwords(r300); /* Reserve requested CS space. */ if (cs_dwords > (r300->cs->ndw - r300->cs->cdw)) { @@ -250,9 +240,6 @@ static void r300_prepare_for_rendering(struct r300_context *r300, if (emit_aos_swtcl) r300_emit_aos_swtcl(r300, indexed); } - - if (end_cs_dwords) - *end_cs_dwords = end_dwords; } static boolean immd_is_good_idea(struct r300_context *r300, @@ -353,7 +340,7 @@ static void r300_emit_draw_arrays_immediate(struct r300_context *r300, dwords = 9 + count * vertex_size; - r300_prepare_for_rendering(r300, PREP_FIRST_DRAW, NULL, dwords, 0, 0, NULL); + r300_prepare_for_rendering(r300, PREP_FIRST_DRAW, NULL, dwords, 0, 0); BEGIN_CS(dwords); OUT_CS_REG(R300_GA_COLOR_CONTROL, @@ -533,7 +520,7 @@ static void r300_draw_range_elements(struct pipe_context* pipe, /* 15 dwords for emit_draw_elements */ r300_prepare_for_rendering(r300, PREP_FIRST_DRAW | PREP_VALIDATE_VBOS | PREP_EMIT_AOS | PREP_INDEXED, - indexBuffer, 15, buffer_offset, indexBias, NULL); + indexBuffer, 15, buffer_offset, indexBias); if (alt_num_verts || count <= 65535) { r300_emit_draw_elements(r300, indexBuffer, indexSize, @@ -552,7 +539,7 @@ static void r300_draw_range_elements(struct pipe_context* pipe, if (count) { r300_prepare_for_rendering(r300, PREP_VALIDATE_VBOS | PREP_EMIT_AOS | PREP_INDEXED, - indexBuffer, 15, buffer_offset, indexBias, NULL); + indexBuffer, 15, buffer_offset, indexBias); } } while (count); } @@ -597,7 +584,7 @@ static void r300_draw_arrays(struct pipe_context* pipe, unsigned mode, } else { /* 9 spare dwords for emit_draw_arrays. */ r300_prepare_for_rendering(r300, PREP_FIRST_DRAW | PREP_VALIDATE_VBOS | PREP_EMIT_AOS, - NULL, 9, start, 0, NULL); + NULL, 9, start, 0); if (alt_num_verts || count <= 65535) { r300_emit_draw_arrays(r300, mode, count); @@ -613,7 +600,7 @@ static void r300_draw_arrays(struct pipe_context* pipe, unsigned mode, if (count) { r300_prepare_for_rendering(r300, PREP_VALIDATE_VBOS | PREP_EMIT_AOS, NULL, 9, - start, 0, NULL); + start, 0); } } while (count); } @@ -854,7 +841,7 @@ static void r300_render_draw_arrays(struct vbuf_render* render, (void) i; (void) ptr; r300_prepare_for_rendering(r300, PREP_FIRST_DRAW | PREP_EMIT_AOS_SWTCL, - NULL, dwords, 0, 0, NULL); + NULL, dwords, 0, 0); DBG(r300, DBG_DRAW, "r300: render_draw_arrays (count: %d)\n", count); @@ -907,7 +894,8 @@ static void r300_render_draw_elements(struct vbuf_render* render, * indices than it can fit in CS. */ r300_prepare_for_rendering(r300, PREP_FIRST_DRAW | PREP_EMIT_AOS_SWTCL | PREP_INDEXED, - NULL, 256, 0, 0, &end_cs_dwords); + NULL, 256, 0, 0); + end_cs_dwords = r300_get_num_cs_end_dwords(r300); while (count) { free_dwords = r300->cs->ndw - r300->cs->cdw; @@ -937,7 +925,8 @@ static void r300_render_draw_elements(struct vbuf_render* render, if (count) { r300_prepare_for_rendering(r300, PREP_EMIT_AOS_SWTCL | PREP_INDEXED, - NULL, 256, 0, 0, &end_cs_dwords); + NULL, 256, 0, 0); + end_cs_dwords = r300_get_num_cs_end_dwords(r300); } } } @@ -1031,7 +1020,7 @@ static void r300_blitter_draw_rectangle(struct blitter_context *blitter, r300->clip_state.dirty = FALSE; r300->viewport_state.dirty = FALSE; - r300_prepare_for_rendering(r300, PREP_FIRST_DRAW, NULL, dwords, 0, 0, NULL); + r300_prepare_for_rendering(r300, PREP_FIRST_DRAW, NULL, dwords, 0, 0); DBG(r300, DBG_DRAW, "r300: draw_rectangle\n"); -- cgit v1.2.3 From 5f8ccf1e2766e3e7537dd8a838837c5e4af4246c Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Sat, 14 Aug 2010 18:37:04 +0200 Subject: r300g: do not use fastfill if ZMask RAM is not properly initialized z_fastfill -> dirty_zmask[level]. --- src/gallium/drivers/r300/r300_blit.c | 16 ++++++++++++---- src/gallium/drivers/r300/r300_context.h | 2 -- src/gallium/drivers/r300/r300_hyperz.c | 27 ++++++++++++++++++--------- src/gallium/drivers/r300/r300_state.c | 4 +--- 4 files changed, 31 insertions(+), 18 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/drivers/r300/r300_blit.c b/src/gallium/drivers/r300/r300_blit.c index 5fe9b9017d..d44f53ec6f 100644 --- a/src/gallium/drivers/r300/r300_blit.c +++ b/src/gallium/drivers/r300/r300_blit.c @@ -99,9 +99,6 @@ static boolean r300_cbzb_clear_allowed(struct r300_context *r300, struct pipe_framebuffer_state *fb = (struct pipe_framebuffer_state*)r300->fb_state.state; - if (r300->z_fastfill) - clear_buffers &= ~(PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL); - /* Only color clear allowed, and only one colorbuffer. */ if (clear_buffers != PIPE_CLEAR_COLOR || fb->nr_cbufs != 1) return FALSE; @@ -186,8 +183,10 @@ static void r300_clear(struct pipe_context* pipe, r300_depth_clear_value(fb->zsbuf->format, depth, stencil); r300_mark_fb_state_dirty(r300, R300_CHANGED_ZCLEAR_FLAG); - if (r300->z_compression || r300->z_fastfill) + if (r300_texture(fb->zsbuf->texture)->zmask_mem[fb->zsbuf->level]) { r300->zmask_clear.dirty = TRUE; + buffers &= ~PIPE_CLEAR_DEPTHSTENCIL; + } if (r300->hiz_enable) r300->hiz_clear.dirty = TRUE; } @@ -222,6 +221,15 @@ static void r300_clear(struct pipe_context* pipe, r300_mark_fb_state_dirty(r300, R300_CHANGED_CBZB_FLAG); } + /* Enable fastfill. + * + * If we cleared the zmask, it's dirty now. The Hyper-Z state update + * looks for a dirty zmask and enables fastfill accordingly. */ + if (fb->zsbuf && + r300_texture(fb->zsbuf->texture)->dirty_zmask[fb->zsbuf->level]) { + r300->hyperz_state.dirty = TRUE; + } + /* XXX this flush "fixes" a hardlock in the cubestorm xscreensaver */ if (r300->flush_counter == 0) pipe->flush(pipe, 0, NULL); diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h index 8b772f3887..98e6d358e9 100644 --- a/src/gallium/drivers/r300/r300_context.h +++ b/src/gallium/drivers/r300/r300_context.h @@ -565,8 +565,6 @@ struct r300_context { boolean two_sided_color; /* Incompatible vertex buffer layout? (misaligned stride or buffer_offset) */ boolean incompatible_vb_layout; - /* Whether fast zclear is enabled. */ - boolean z_fastfill; #define R300_Z_COMPRESS_44 1 #define RV350_Z_COMPRESS_88 2 int z_compression; diff --git a/src/gallium/drivers/r300/r300_hyperz.c b/src/gallium/drivers/r300/r300_hyperz.c index 3b0adc3584..1f6244c49e 100644 --- a/src/gallium/drivers/r300/r300_hyperz.c +++ b/src/gallium/drivers/r300/r300_hyperz.c @@ -129,6 +129,9 @@ static void r300_update_hyperz(struct r300_context* r300) { struct r300_hyperz_state *z = (struct r300_hyperz_state*)r300->hyperz_state.state; + struct pipe_framebuffer_state *fb = + (struct pipe_framebuffer_state*)r300->fb_state.state; + boolean dirty_zmask = FALSE; z->gb_z_peq_config = 0; z->zb_bw_cntl = 0; @@ -140,23 +143,29 @@ static void r300_update_hyperz(struct r300_context* r300) return; } + if (!fb->zsbuf) + return; + if (!r300->rws->get_value(r300->rws, R300_CAN_HYPERZ)) return; + dirty_zmask = r300_texture(fb->zsbuf->texture)->dirty_zmask[fb->zsbuf->level]; + + /* Z fastfill. */ + if (dirty_zmask) { + z->zb_bw_cntl |= R300_FAST_FILL_ENABLE; /* | R300_FORCE_COMPRESSED_STENCIL_VALUE_ENABLE;*/ + } + /* Zbuffer compression. */ - if (r300->z_compression) { + if (dirty_zmask && r300->z_compression) { z->zb_bw_cntl |= R300_RD_COMP_ENABLE; if (r300->z_decomp_rd == false) z->zb_bw_cntl |= R300_WR_COMP_ENABLE; - /* RV350 and up optimizations. */ - if (r300->z_compression == RV350_Z_COMPRESS_88) - z->gb_z_peq_config |= R300_GB_Z_PEQ_CONFIG_Z_PEQ_SIZE_8_8; - } - - /* Z fastfill. */ - if (r300->z_fastfill) { - z->zb_bw_cntl |= R300_FAST_FILL_ENABLE; /* | R300_FORCE_COMPRESSED_STENCIL_VALUE_ENABLE;*/ } + /* RV350 and up optimizations. */ + /* The section 10.4.9 in the docs is a lie. */ + if (r300->z_compression == RV350_Z_COMPRESS_88) + z->gb_z_peq_config |= R300_GB_Z_PEQ_CONFIG_Z_PEQ_SIZE_8_8; if (r300->hiz_enable) { bool can_hiz = r300_can_hiz(r300); diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c index 8abc65a2b8..9afaa5f651 100644 --- a/src/gallium/drivers/r300/r300_state.c +++ b/src/gallium/drivers/r300/r300_state.c @@ -753,7 +753,6 @@ static void r300_mark_fb_state_dirty(r300, R300_CHANGED_FB_STATE); r300->hiz_enable = false; - r300->z_fastfill = false; r300->z_compression = false; if (state->zsbuf) { @@ -785,7 +784,6 @@ static void } if (tex->zmask_mem[level]) { - r300->z_fastfill = 1; /* compression causes hangs on 16-bit */ if (zbuffer_bpp == 24) r300->z_compression = compress; @@ -793,7 +791,7 @@ static void DBG(r300, DBG_HYPERZ, "hyper-z features: hiz: %d @ %08x z-compression: %d z-fastfill: %d @ %08x\n", r300->hiz_enable, tex->hiz_mem[level] ? tex->hiz_mem[level]->ofs : 0xdeadbeef, - r300->z_compression, r300->z_fastfill, + r300->z_compression, tex->zmask_mem[level] ? 1 : 0, tex->zmask_mem[level] ? tex->zmask_mem[level]->ofs : 0xdeadbeef); } -- cgit v1.2.3 From 516152112ea2a8524865d230f657977583246092 Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Fri, 13 Aug 2010 04:57:30 +0200 Subject: r300g: do not clear with blitter if we clear just the ZMask RAM This skips the blitter clear path entirely if the color is not cleared and the depth+stencil is cleared with the ZMask. --- src/gallium/drivers/r300/r300_blit.c | 46 +++++++++++++++++++++++++++++------- 1 file changed, 37 insertions(+), 9 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/drivers/r300/r300_blit.c b/src/gallium/drivers/r300/r300_blit.c index d44f53ec6f..00756c771c 100644 --- a/src/gallium/drivers/r300/r300_blit.c +++ b/src/gallium/drivers/r300/r300_blit.c @@ -21,6 +21,8 @@ * USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include "r300_context.h" +#include "r300_emit.h" +#include "r300_hyperz.h" #include "r300_texture.h" #include "r300_winsys.h" @@ -177,8 +179,7 @@ static void r300_clear(struct pipe_context* pipe, /* Enable fast Z clear. * The zbuffer must be in micro-tiled mode, otherwise it locks up. */ - if ((buffers & (PIPE_CLEAR_DEPTH|PIPE_CLEAR_STENCIL)) && has_hyperz) { - + if ((buffers & PIPE_CLEAR_DEPTHSTENCIL) && has_hyperz) { hyperz_dcv = hyperz->zb_depthclearvalue = r300_depth_clear_value(fb->zsbuf->format, depth, stencil); @@ -206,13 +207,40 @@ static void r300_clear(struct pipe_context* pipe, } /* Clear. */ - r300_blitter_begin(r300, R300_CLEAR); - util_blitter_clear(r300->blitter, - width, - height, - fb->nr_cbufs, - buffers, rgba, depth, stencil); - r300_blitter_end(r300); + if (buffers) { + /* Clear using the blitter. */ + r300_blitter_begin(r300, R300_CLEAR); + util_blitter_clear(r300->blitter, + width, + height, + fb->nr_cbufs, + buffers, rgba, depth, stencil); + r300_blitter_end(r300); + } else if (r300->zmask_clear.dirty) { + /* Just clear zmask and hiz now, this does not use a standard draw + * procedure. */ + unsigned dwords; + + /* Calculate zmask_clear and hiz_clear atom sizes. */ + r300_update_hyperz_state(r300); + dwords = r300->zmask_clear.size + + (r300->hiz_clear.dirty ? r300->hiz_clear.size : 0) + + r300_get_num_cs_end_dwords(r300); + + /* Reserve CS space. */ + if (dwords > (r300->cs->ndw - r300->cs->cdw)) { + r300->context.flush(&r300->context, 0, NULL); + } + + /* Emit clear packets. */ + r300_emit_zmask_clear(r300, r300->zmask_clear.size, + r300->zmask_clear.state); + if (r300->hiz_clear.dirty) + r300_emit_hiz_clear(r300, r300->hiz_clear.size, + r300->hiz_clear.state); + } else { + assert(0); + } /* Disable CBZB clear. */ if (r300->cbzb_clear) { -- cgit v1.2.3 From 59c2230879cb5149ce99ac8565ce6af9c5b02e04 Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Sun, 15 Aug 2010 02:18:15 +0200 Subject: r300g: rename dirty_zmask -> zmask_in_use --- src/gallium/drivers/r300/r300_blit.c | 10 +++++----- src/gallium/drivers/r300/r300_context.h | 2 +- src/gallium/drivers/r300/r300_emit.c | 4 ++-- src/gallium/drivers/r300/r300_hyperz.c | 8 ++++---- src/gallium/drivers/r300/r300_state_derived.c | 2 +- 5 files changed, 13 insertions(+), 13 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/drivers/r300/r300_blit.c b/src/gallium/drivers/r300/r300_blit.c index 00756c771c..29393109fa 100644 --- a/src/gallium/drivers/r300/r300_blit.c +++ b/src/gallium/drivers/r300/r300_blit.c @@ -251,10 +251,10 @@ static void r300_clear(struct pipe_context* pipe, /* Enable fastfill. * - * If we cleared the zmask, it's dirty now. The Hyper-Z state update - * looks for a dirty zmask and enables fastfill accordingly. */ + * If we cleared the zmask, it's in use now. The Hyper-Z state update + * looks if zmask is in use and enables fastfill accordingly. */ if (fb->zsbuf && - r300_texture(fb->zsbuf->texture)->dirty_zmask[fb->zsbuf->level]) { + r300_texture(fb->zsbuf->texture)->zmask_in_use[fb->zsbuf->level]) { r300->hyperz_state.dirty = TRUE; } @@ -307,7 +307,7 @@ void r300_flush_depth_stencil(struct pipe_context *pipe, if (!tex->zmask_mem[subdst.level]) return; - if (!tex->dirty_zmask[subdst.level]) + if (!tex->zmask_in_use[subdst.level]) return; dstsurf = pipe->screen->get_tex_surface(pipe->screen, dst, @@ -319,7 +319,7 @@ void r300_flush_depth_stencil(struct pipe_context *pipe, r300_blitter_end(r300); r300->z_decomp_rd = FALSE; - tex->dirty_zmask[subdst.level] = FALSE; + tex->zmask_in_use[subdst.level] = FALSE; } /* Copy a block of pixels from one surface to another using HW. */ diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h index 98e6d358e9..6c4c396982 100644 --- a/src/gallium/drivers/r300/r300_context.h +++ b/src/gallium/drivers/r300/r300_context.h @@ -397,7 +397,7 @@ struct r300_texture { /* hyper-z memory allocs */ struct mem_block *hiz_mem[R300_MAX_TEXTURE_LEVELS]; struct mem_block *zmask_mem[R300_MAX_TEXTURE_LEVELS]; - boolean dirty_zmask[R300_MAX_TEXTURE_LEVELS]; + boolean zmask_in_use[R300_MAX_TEXTURE_LEVELS]; /* This is the level tiling flags were last time set for. * It's used to prevent redundant tiling-flags changes from happening.*/ diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c index ccf360b6ea..fe334c924e 100644 --- a/src/gallium/drivers/r300/r300_emit.c +++ b/src/gallium/drivers/r300/r300_emit.c @@ -1086,8 +1086,8 @@ void r300_emit_zmask_clear(struct r300_context *r300, unsigned size, void *state r300_emit_zmask_line_clear(r300, offset, stride, 0x0);//0xffffffff); } - /* Mark the current zbuffer's zmask as dirty. */ - tex->dirty_zmask[fb->zsbuf->level] = TRUE; + /* Mark the current zbuffer's zmask as in use. */ + tex->zmask_in_use[fb->zsbuf->level] = TRUE; } void r300_emit_ztop_state(struct r300_context* r300, diff --git a/src/gallium/drivers/r300/r300_hyperz.c b/src/gallium/drivers/r300/r300_hyperz.c index 1f6244c49e..9cce195eb4 100644 --- a/src/gallium/drivers/r300/r300_hyperz.c +++ b/src/gallium/drivers/r300/r300_hyperz.c @@ -131,7 +131,7 @@ static void r300_update_hyperz(struct r300_context* r300) (struct r300_hyperz_state*)r300->hyperz_state.state; struct pipe_framebuffer_state *fb = (struct pipe_framebuffer_state*)r300->fb_state.state; - boolean dirty_zmask = FALSE; + boolean zmask_in_use = FALSE; z->gb_z_peq_config = 0; z->zb_bw_cntl = 0; @@ -149,15 +149,15 @@ static void r300_update_hyperz(struct r300_context* r300) if (!r300->rws->get_value(r300->rws, R300_CAN_HYPERZ)) return; - dirty_zmask = r300_texture(fb->zsbuf->texture)->dirty_zmask[fb->zsbuf->level]; + zmask_in_use = r300_texture(fb->zsbuf->texture)->zmask_in_use[fb->zsbuf->level]; /* Z fastfill. */ - if (dirty_zmask) { + if (zmask_in_use) { z->zb_bw_cntl |= R300_FAST_FILL_ENABLE; /* | R300_FORCE_COMPRESSED_STENCIL_VALUE_ENABLE;*/ } /* Zbuffer compression. */ - if (dirty_zmask && r300->z_compression) { + if (zmask_in_use && r300->z_compression) { z->zb_bw_cntl |= R300_RD_COMP_ENABLE; if (r300->z_decomp_rd == false) z->zb_bw_cntl |= R300_WR_COMP_ENABLE; diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c index c047a127ba..4a63ed7fc1 100644 --- a/src/gallium/drivers/r300/r300_state_derived.c +++ b/src/gallium/drivers/r300/r300_state_derived.c @@ -738,7 +738,7 @@ static void r300_flush_depth_textures(struct r300_context *r300) continue; for (level = 0; level <= tex->last_level; level++) - if (r300_texture(tex)->dirty_zmask[level]) { + if (r300_texture(tex)->zmask_in_use[level]) { /* We don't handle 3D textures and cubemaps yet. */ r300_flush_depth_stencil(&r300->context, tex, u_subresource(0, level), 0); -- cgit v1.2.3 From d5a86f9fc9fc96a0d771c0409b557636ef89f350 Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Sun, 15 Aug 2010 03:14:25 +0200 Subject: r300g: do not use HiZ if HiZ RAM is not properly initialized --- src/gallium/drivers/r300/r300_blit.c | 17 ++++++++++------- src/gallium/drivers/r300/r300_context.h | 2 +- src/gallium/drivers/r300/r300_emit.c | 3 +++ src/gallium/drivers/r300/r300_hyperz.c | 12 ++++++++---- src/gallium/drivers/r300/r300_state.c | 7 +------ 5 files changed, 23 insertions(+), 18 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/drivers/r300/r300_blit.c b/src/gallium/drivers/r300/r300_blit.c index 29393109fa..559936cec6 100644 --- a/src/gallium/drivers/r300/r300_blit.c +++ b/src/gallium/drivers/r300/r300_blit.c @@ -172,6 +172,8 @@ static void r300_clear(struct pipe_context* pipe, (struct pipe_framebuffer_state*)r300->fb_state.state; struct r300_hyperz_state *hyperz = (struct r300_hyperz_state*)r300->hyperz_state.state; + struct r300_texture *zstex = + fb->zsbuf ? r300_texture(fb->zsbuf->texture) : NULL; uint32_t width = fb->width; uint32_t height = fb->height; boolean has_hyperz = r300->rws->get_value(r300->rws, R300_CAN_HYPERZ); @@ -184,11 +186,11 @@ static void r300_clear(struct pipe_context* pipe, r300_depth_clear_value(fb->zsbuf->format, depth, stencil); r300_mark_fb_state_dirty(r300, R300_CHANGED_ZCLEAR_FLAG); - if (r300_texture(fb->zsbuf->texture)->zmask_mem[fb->zsbuf->level]) { + if (zstex->zmask_mem[fb->zsbuf->level]) { r300->zmask_clear.dirty = TRUE; buffers &= ~PIPE_CLEAR_DEPTHSTENCIL; } - if (r300->hiz_enable) + if (zstex->hiz_mem[fb->zsbuf->level]) r300->hiz_clear.dirty = TRUE; } @@ -249,12 +251,13 @@ static void r300_clear(struct pipe_context* pipe, r300_mark_fb_state_dirty(r300, R300_CHANGED_CBZB_FLAG); } - /* Enable fastfill. + /* Enable fastfill and/or hiz. * - * If we cleared the zmask, it's in use now. The Hyper-Z state update - * looks if zmask is in use and enables fastfill accordingly. */ - if (fb->zsbuf && - r300_texture(fb->zsbuf->texture)->zmask_in_use[fb->zsbuf->level]) { + * If we cleared zmask/hiz, it's in use now. The Hyper-Z state update + * looks if zmask/hiz is in use and enables fastfill accordingly. */ + if (zstex && + (zstex->zmask_in_use[fb->zsbuf->level] || + zstex->hiz_in_use[fb->zsbuf->level])) { r300->hyperz_state.dirty = TRUE; } diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h index 6c4c396982..6fa7f470f9 100644 --- a/src/gallium/drivers/r300/r300_context.h +++ b/src/gallium/drivers/r300/r300_context.h @@ -398,6 +398,7 @@ struct r300_texture { struct mem_block *hiz_mem[R300_MAX_TEXTURE_LEVELS]; struct mem_block *zmask_mem[R300_MAX_TEXTURE_LEVELS]; boolean zmask_in_use[R300_MAX_TEXTURE_LEVELS]; + boolean hiz_in_use[R300_MAX_TEXTURE_LEVELS]; /* This is the level tiling flags were last time set for. * It's used to prevent redundant tiling-flags changes from happening.*/ @@ -568,7 +569,6 @@ struct r300_context { #define R300_Z_COMPRESS_44 1 #define RV350_Z_COMPRESS_88 2 int z_compression; - boolean hiz_enable; boolean cbzb_clear; boolean z_decomp_rd; diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c index fe334c924e..d0fd45349e 100644 --- a/src/gallium/drivers/r300/r300_emit.c +++ b/src/gallium/drivers/r300/r300_emit.c @@ -1046,6 +1046,9 @@ void r300_emit_hiz_clear(struct r300_context *r300, unsigned size, void *state) r300_emit_hiz_line_clear(r300, offset, stride, 0xffffffff); } z->current_func = -1; + + /* Mark the current zbuffer's hiz ram as in use. */ + tex->hiz_in_use[fb->zsbuf->level] = TRUE; } void r300_emit_zmask_clear(struct r300_context *r300, unsigned size, void *state) diff --git a/src/gallium/drivers/r300/r300_hyperz.c b/src/gallium/drivers/r300/r300_hyperz.c index 9cce195eb4..811b5646e1 100644 --- a/src/gallium/drivers/r300/r300_hyperz.c +++ b/src/gallium/drivers/r300/r300_hyperz.c @@ -131,7 +131,10 @@ static void r300_update_hyperz(struct r300_context* r300) (struct r300_hyperz_state*)r300->hyperz_state.state; struct pipe_framebuffer_state *fb = (struct pipe_framebuffer_state*)r300->fb_state.state; + struct r300_texture *zstex = + fb->zsbuf ? r300_texture(fb->zsbuf->texture) : NULL; boolean zmask_in_use = FALSE; + boolean hiz_in_use = FALSE; z->gb_z_peq_config = 0; z->zb_bw_cntl = 0; @@ -143,13 +146,14 @@ static void r300_update_hyperz(struct r300_context* r300) return; } - if (!fb->zsbuf) + if (!zstex) return; if (!r300->rws->get_value(r300->rws, R300_CAN_HYPERZ)) return; - zmask_in_use = r300_texture(fb->zsbuf->texture)->zmask_in_use[fb->zsbuf->level]; + zmask_in_use = zstex->zmask_in_use[fb->zsbuf->level]; + hiz_in_use = zstex->hiz_in_use[fb->zsbuf->level]; /* Z fastfill. */ if (zmask_in_use) { @@ -167,7 +171,7 @@ static void r300_update_hyperz(struct r300_context* r300) if (r300->z_compression == RV350_Z_COMPRESS_88) z->gb_z_peq_config |= R300_GB_Z_PEQ_CONFIG_Z_PEQ_SIZE_8_8; - if (r300->hiz_enable) { + if (hiz_in_use) { bool can_hiz = r300_can_hiz(r300); if (can_hiz) { z->zb_bw_cntl |= R300_HIZ_ENABLE; @@ -177,8 +181,8 @@ static void r300_update_hyperz(struct r300_context* r300) } } + /* R500-specific features and optimizations. */ if (r300->screen->caps.is_r500) { - /* XXX Are these bits really available on RV350? */ z->zb_bw_cntl |= R500_HIZ_FP_EXP_BITS_3; z->zb_bw_cntl |= R500_HIZ_EQUAL_REJECT_ENABLE | diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c index 9afaa5f651..239edd98e3 100644 --- a/src/gallium/drivers/r300/r300_state.c +++ b/src/gallium/drivers/r300/r300_state.c @@ -752,7 +752,6 @@ static void r300_mark_fb_state_dirty(r300, R300_CHANGED_FB_STATE); - r300->hiz_enable = false; r300->z_compression = false; if (state->zsbuf) { @@ -779,17 +778,13 @@ static void /* work out whether we can support zmask features on this buffer */ r300_zmask_alloc_block(r300, zs_surf, compress); - if (tex->hiz_mem[level]) { - r300->hiz_enable = 1; - } - if (tex->zmask_mem[level]) { /* compression causes hangs on 16-bit */ if (zbuffer_bpp == 24) r300->z_compression = compress; } DBG(r300, DBG_HYPERZ, - "hyper-z features: hiz: %d @ %08x z-compression: %d z-fastfill: %d @ %08x\n", r300->hiz_enable, + "hyper-z features: hiz: %d @ %08x z-compression: %d z-fastfill: %d @ %08x\n", tex->hiz_mem[level] ? 1 : 0, tex->hiz_mem[level] ? tex->hiz_mem[level]->ofs : 0xdeadbeef, r300->z_compression, tex->zmask_mem[level] ? 1 : 0, tex->zmask_mem[level] ? tex->zmask_mem[level]->ofs : 0xdeadbeef); -- cgit v1.2.3 From 6dfcff6b05f44ad4e338fac18ccd4d470c953b1e Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Sun, 15 Aug 2010 09:13:50 +0200 Subject: r300g: mark HiZ/ZMask_clear atoms as non-dirty after emission in clear --- src/gallium/drivers/r300/r300_blit.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'src/gallium') diff --git a/src/gallium/drivers/r300/r300_blit.c b/src/gallium/drivers/r300/r300_blit.c index 559936cec6..47ffc0cb3c 100644 --- a/src/gallium/drivers/r300/r300_blit.c +++ b/src/gallium/drivers/r300/r300_blit.c @@ -237,9 +237,12 @@ static void r300_clear(struct pipe_context* pipe, /* Emit clear packets. */ r300_emit_zmask_clear(r300, r300->zmask_clear.size, r300->zmask_clear.state); - if (r300->hiz_clear.dirty) + r300->zmask_clear.dirty = FALSE; + if (r300->hiz_clear.dirty) { r300_emit_hiz_clear(r300, r300->hiz_clear.size, r300->hiz_clear.state); + r300->hiz_clear.dirty = FALSE; + } } else { assert(0); } -- cgit v1.2.3 From bf357aedffd659e43ef9ceefa875c08991a5f46d Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Sun, 15 Aug 2010 03:05:18 -0700 Subject: gallium/docs: Add formatting for envvar role; change debugging. Per Jakob's request. Not super-pretty, but it's a good point for modding later. --- src/gallium/docs/source/conf.py | 2 +- src/gallium/docs/source/debugging.rst | 24 +++++++++++------------ src/gallium/docs/source/exts/formatting.py | 31 ++++++++++++++++++++++++++++++ src/gallium/docs/source/exts/tgsi.py | 17 ---------------- 4 files changed, 44 insertions(+), 30 deletions(-) create mode 100644 src/gallium/docs/source/exts/formatting.py delete mode 100644 src/gallium/docs/source/exts/tgsi.py (limited to 'src/gallium') diff --git a/src/gallium/docs/source/conf.py b/src/gallium/docs/source/conf.py index 99e665234e..0846e7d0ec 100644 --- a/src/gallium/docs/source/conf.py +++ b/src/gallium/docs/source/conf.py @@ -22,7 +22,7 @@ sys.path.append(os.path.abspath('exts')) # Add any Sphinx extension module names here, as strings. They can be extensions # coming with Sphinx (named 'sphinx.ext.*') or your custom ones. -extensions = ['sphinx.ext.pngmath', 'tgsi'] +extensions = ['sphinx.ext.pngmath', 'formatting'] # Add any paths that contain templates here, relative to this directory. templates_path = ['_templates'] diff --git a/src/gallium/docs/source/debugging.rst b/src/gallium/docs/source/debugging.rst index 74f51bc0ea..f29bf53dfe 100644 --- a/src/gallium/docs/source/debugging.rst +++ b/src/gallium/docs/source/debugging.rst @@ -15,36 +15,36 @@ program from that console. Common """""" -GALLIUM_PRINT_OPTIONS (false) +.. envvar:: GALLIUM_PRINT_OPTIONS (false) This options controls if the debug variables should be printed to stderr. This is probably the most usefull variable since it allows you to find which variables a driver responds to. -GALLIUM_RBUG (false) +.. envvar:: GALLIUM_RBUG (false) Controls if the :ref:`rbug` should be used. -GALLIUM_TRACE ("") +.. envvar:: GALLIUM_TRACE ("") If not set tracing is not used, if set it will write the output to the file specifed by the variable. So setting it to "trace.xml" will write the output to the file "trace.xml". -GALLIUM_DUMP_CPU (false) +.. envvar:: GALLIUM_DUMP_CPU (false) Dump information about the current cpu that the driver is running on. -TGSI_PRINT_SANITY (false) +.. envvar:: TGSI_PRINT_SANITY (false) Gallium has a inbuilt shader sanity checker, this option controls if results from it should be printed. This include warnings such as unused variables. -DRAW_USE_LLVM (false) +.. envvar:: DRAW_USE_LLVM (false) Should the :ref:`draw` module use llvm for vertex and geometry shaders. -ST_DEBUG (0x0) +.. envvar:: ST_DEBUG (0x0) Debug :ref:`flags` for the GL state tracker. @@ -52,23 +52,23 @@ Debug :ref:`flags` for the GL state tracker. Driver specific """"""""""""""" -I915_DEBUG (0x0) +.. envvar:: I915_DEBUG (0x0) Debug :ref:`flags` for the i915 driver. -I915_NO_HW (false) +.. envvar:: I915_NO_HW (false) Stop the i915 driver from submitting commands to the hardware. -I915_DUMP_CMD (false) +.. envvar:: I915_DUMP_CMD (false) Dump all commands going to the hardware. -LP_DEBUG (0x0) +.. envvar:: LP_DEBUG (0x0) Debug :ref:`flags` for the llvmpipe driver. -LP_NUM_THREADS (num cpus) +.. envvar:: LP_NUM_THREADS (num cpus) Number of threads that the llvmpipe driver should use. diff --git a/src/gallium/docs/source/exts/formatting.py b/src/gallium/docs/source/exts/formatting.py new file mode 100644 index 0000000000..14865f3603 --- /dev/null +++ b/src/gallium/docs/source/exts/formatting.py @@ -0,0 +1,31 @@ +# formatting.py +# Sphinx extension providing formatting for Gallium-specific data +# (c) Corbin Simpson 2010 +# Public domain to the extent permitted; contact author for special licensing + +import docutils.nodes +import sphinx.addnodes + +def parse_envvar(env, sig, signode): + envvar, t, default = sig.split(" ", 2) + envvar = envvar.strip().upper() + t = " Type: %s" % t.strip(" <>").lower() + default = " Default: %s" % default.strip(" ()") + signode += sphinx.addnodes.desc_name(envvar, envvar) + signode += sphinx.addnodes.desc_type(t, t) + signode += sphinx.addnodes.desc_annotation(default, default) + return envvar + +def parse_opcode(env, sig, signode): + opcode, desc = sig.split("-", 1) + opcode = opcode.strip().upper() + desc = " (%s)" % desc.strip() + signode += sphinx.addnodes.desc_name(opcode, opcode) + signode += sphinx.addnodes.desc_annotation(desc, desc) + return opcode + +def setup(app): + app.add_description_unit("envvar", "envvar", "%s (environment variable)", + parse_envvar) + app.add_description_unit("opcode", "opcode", "%s (TGSI opcode)", + parse_opcode) diff --git a/src/gallium/docs/source/exts/tgsi.py b/src/gallium/docs/source/exts/tgsi.py deleted file mode 100644 index e92cd5c4d1..0000000000 --- a/src/gallium/docs/source/exts/tgsi.py +++ /dev/null @@ -1,17 +0,0 @@ -# tgsi.py -# Sphinx extension providing formatting for TGSI opcodes -# (c) Corbin Simpson 2010 - -import docutils.nodes -import sphinx.addnodes - -def parse_opcode(env, sig, signode): - opcode, desc = sig.split("-", 1) - opcode = opcode.strip().upper() - desc = " (%s)" % desc.strip() - signode += sphinx.addnodes.desc_name(opcode, opcode) - signode += sphinx.addnodes.desc_annotation(desc, desc) - return opcode - -def setup(app): - app.add_description_unit("opcode", "opcode", "%s (TGSI opcode)", parse_opcode) -- cgit v1.2.3 From 2d53dc873ea1d9e0e3e4c1cf08a63621661e422f Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Sun, 15 Aug 2010 03:26:58 -0700 Subject: gallium/docs: Cleanup debugging. Spelling, grammar, organization. --- src/gallium/docs/source/debugging.rst | 55 +++++++++++++++++++---------------- 1 file changed, 30 insertions(+), 25 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/docs/source/debugging.rst b/src/gallium/docs/source/debugging.rst index f29bf53dfe..42bda5aee9 100644 --- a/src/gallium/docs/source/debugging.rst +++ b/src/gallium/docs/source/debugging.rst @@ -6,10 +6,10 @@ Debugging utilities in gallium. Debug Variables ^^^^^^^^^^^^^^^ -All drivers respond to a couple of debug enviromental variables. Below is -a collection of them. Set them as you would any normal enviromental variable -for the platform/operating system you are running. For linux this can be -done by typing "export var=value" into a console and then running the +All drivers respond to a set of common debug environment variables, as well as +some driver-specific variables. Set them as normal environment variables for +the platform or operating system you are running. For example, for Linux this +can be done by typing "export var=value" into a console and then running the program from that console. Common @@ -17,9 +17,9 @@ Common .. envvar:: GALLIUM_PRINT_OPTIONS (false) -This options controls if the debug variables should be printed to stderr. -This is probably the most usefull variable since it allows you to find -which variables a driver responds to. +This option controls if the debug variables should be printed to stderr. This +is probably the most useful variable, since it allows you to find which +variables a driver uses. .. envvar:: GALLIUM_RBUG (false) @@ -27,29 +27,34 @@ Controls if the :ref:`rbug` should be used. .. envvar:: GALLIUM_TRACE ("") -If not set tracing is not used, if set it will write the output to the file -specifed by the variable. So setting it to "trace.xml" will write the output -to the file "trace.xml". +If set, this variable will cause the :ref:`Trace` output to be written to the +specified file. Paths may be relative or absolute; relative paths are relative +to the working directory. For example, setting it to "trace.xml" will cause +the trace to be written to a file of the same name in the working directory. .. envvar:: GALLIUM_DUMP_CPU (false) -Dump information about the current cpu that the driver is running on. +Dump information about the current CPU that the driver is running on. .. envvar:: TGSI_PRINT_SANITY (false) -Gallium has a inbuilt shader sanity checker, this option controls if results -from it should be printed. This include warnings such as unused variables. +Gallium has a built-in shader sanity checker. This option controls whether +the shader sanity checker prints its warnings and errors to stderr. .. envvar:: DRAW_USE_LLVM (false) -Should the :ref:`draw` module use llvm for vertex and geometry shaders. +Whether the :ref:`Draw` module will attempt to use LLVM for vertex and geometry shaders. + + +State tracker-specific +"""""""""""""""""""""" .. envvar:: ST_DEBUG (0x0) Debug :ref:`flags` for the GL state tracker. -Driver specific +Driver-specific """"""""""""""" .. envvar:: I915_DEBUG (0x0) @@ -68,7 +73,7 @@ Dump all commands going to the hardware. Debug :ref:`flags` for the llvmpipe driver. -.. envvar:: LP_NUM_THREADS (num cpus) +.. envvar:: LP_NUM_THREADS (number of CPUs) Number of threads that the llvmpipe driver should use. @@ -78,10 +83,10 @@ Number of threads that the llvmpipe driver should use. Flags """"" -The variables of type all take a string with comma seperated -flags to enable different debugging for different parts of the drivers -or state tracker. If set to "help" the driver will print a list of flags -to which the variable can be set to. Order does not matter. +The variables of type "flags" all take a string with comma-separated flags to +enable different debugging for different parts of the drivers or state +tracker. If set to "help", the driver will print a list of flags which the +variable accepts. Order does not matter. .. _rbug: @@ -89,8 +94,8 @@ to which the variable can be set to. Order does not matter. Remote Debugger ^^^^^^^^^^^^^^^ -Or rbug for short allows for runtime inspections of :ref:`Context`, -:ref:`Screen`, Resources and Shaders; pauseing and stepping of draw calls; -and runtime disable and replacement of shaders. Is used with rbug-gui which -is hosted outside of the main mesa repositor. Rbug is can be used over a -network connection so the debbuger does not need to be on the same machine. +The remote debugger, commonly known as rbug, allows for runtime inspections of +:ref:`Context`, :ref:`Screen`, :ref:`Resource` and :ref:`Shader` objects; and +pausing and stepping of :ref:`Draw` calls. Is used with rbug-gui which is +hosted outside of the main mesa repository. rbug is can be used over a network +connection, so the debugger does not need to be on the same machine. -- cgit v1.2.3 From 510b03539413552a543e25de6b896eb10baf60ae Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Sun, 15 Aug 2010 16:21:46 +0100 Subject: llvmpipe: reorganize block4 loop, nice speedup isosurf 95->115 fps just by exchanging the two inner loops in this function... --- src/gallium/drivers/llvmpipe/lp_rast_tri.c | 15 +++++++++++++++ src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h | 16 ++++------------ 2 files changed, 19 insertions(+), 12 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri.c b/src/gallium/drivers/llvmpipe/lp_rast_tri.c index ebe9a8e92b..c1f2680ddc 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_rast_tri.c @@ -113,6 +113,21 @@ block_full_16(struct lp_rasterizer_task *task, block_full_4(task, tri, x + ix, y + iy); } + +static INLINE unsigned +build_mask(int c, const int *step) +{ + int mask = 0; + int i; + + for (i = 0; i < 16; i++) { + mask |= ((c + step[i]) >> 31) & (1 << i); + } + + return mask; +} + + #define TAG(x) x##_1 #define NR_PLANES 1 #include "lp_rast_tri_tmp.h" diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h b/src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h index a410c611a3..fcb8e2b05d 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h +++ b/src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h @@ -46,19 +46,11 @@ TAG(do_block_4)(struct lp_rasterizer_task *task, int x, int y, const int *c) { - unsigned mask = 0; - int i; + unsigned mask = 0xffff; + int j; - for (i = 0; i < 16; i++) { - int any_negative = 0; - int j; - - for (j = 0; j < NR_PLANES; j++) - any_negative |= (c[j] - 1 + plane[j].step[i]); - - any_negative >>= 31; - - mask |= (~any_negative) & (1 << i); + for (j = 0; j < NR_PLANES; j++) { + mask &= ~build_mask(c[j] - 1, plane[j].step); } /* Now pass to the shader: -- cgit v1.2.3 From 515194968d033d2c0c5678677f7606d38635d747 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Sun, 15 Aug 2010 16:32:45 +0100 Subject: llvmpipe: version of block4 which doesn't need the full step array No noticable slowdown with isosurf. --- src/gallium/drivers/llvmpipe/lp_rast_tri.c | 30 ++++++++++++++++++++------ src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h | 4 +++- 2 files changed, 27 insertions(+), 7 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri.c b/src/gallium/drivers/llvmpipe/lp_rast_tri.c index c1f2680ddc..28a0446172 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_rast_tri.c @@ -115,19 +115,37 @@ block_full_16(struct lp_rasterizer_task *task, static INLINE unsigned -build_mask(int c, const int *step) +build_mask(int c, int dcdx, int dcdy) { int mask = 0; - int i; - for (i = 0; i < 16; i++) { - mask |= ((c + step[i]) >> 31) & (1 << i); - } - + int c0 = c; + int c1 = c0 + dcdx; + int c2 = c1 + dcdx; + int c3 = c2 + dcdx; + + mask |= ((c0 + 0 * dcdy) >> 31) & (1 << 0); + mask |= ((c0 + 1 * dcdy) >> 31) & (1 << 2); + mask |= ((c0 + 2 * dcdy) >> 31) & (1 << 8); + mask |= ((c0 + 3 * dcdy) >> 31) & (1 << 10); + mask |= ((c1 + 0 * dcdy) >> 31) & (1 << 1); + mask |= ((c1 + 1 * dcdy) >> 31) & (1 << 3); + mask |= ((c1 + 2 * dcdy) >> 31) & (1 << 9); + mask |= ((c1 + 3 * dcdy) >> 31) & (1 << 11); + mask |= ((c2 + 0 * dcdy) >> 31) & (1 << 4); + mask |= ((c2 + 1 * dcdy) >> 31) & (1 << 6); + mask |= ((c2 + 2 * dcdy) >> 31) & (1 << 12); + mask |= ((c2 + 3 * dcdy) >> 31) & (1 << 14); + mask |= ((c3 + 0 * dcdy) >> 31) & (1 << 5); + mask |= ((c3 + 1 * dcdy) >> 31) & (1 << 7); + mask |= ((c3 + 2 * dcdy) >> 31) & (1 << 13); + mask |= ((c3 + 3 * dcdy) >> 31) & (1 << 15); + return mask; } + #define TAG(x) x##_1 #define NR_PLANES 1 #include "lp_rast_tri_tmp.h" diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h b/src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h index fcb8e2b05d..e14886f0b9 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h +++ b/src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h @@ -50,7 +50,9 @@ TAG(do_block_4)(struct lp_rasterizer_task *task, int j; for (j = 0; j < NR_PLANES; j++) { - mask &= ~build_mask(c[j] - 1, plane[j].step); + mask &= ~build_mask(c[j] - 1, + plane[j].step[1], + plane[j].step[2]); } /* Now pass to the shader: -- cgit v1.2.3 From 4b322e71bb169af637864922edfb4108675781bb Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Sun, 15 Aug 2010 16:49:26 +0100 Subject: llvmpipe: also use build_mask at 16, 64 pixel levels --- src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h | 47 +++++++++++--------------- 1 file changed, 19 insertions(+), 28 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h b/src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h index e14886f0b9..6d0be76713 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h +++ b/src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h @@ -73,24 +73,19 @@ TAG(do_block_16)(struct lp_rasterizer_task *task, const int *c) { unsigned outmask, inmask, partmask, partial_mask; - unsigned i, j; + unsigned j; outmask = 0; /* outside one or more trivial reject planes */ partmask = 0; /* outside one or more trivial accept planes */ for (j = 0; j < NR_PLANES; j++) { - const int *step = plane[j].step; - const int eo = plane[j].eo * 4; - const int ei = plane[j].ei * 4; - const int cox = c[j] + eo; - const int cio = ei - 1 - eo; - - for (i = 0; i < 16; i++) { - int out = cox + step[i] * 4; - int part = out + cio; - outmask |= (out >> 31) & (1 << i); - partmask |= (part >> 31) & (1 << i); - } + const int dcdx = plane[j].step[1] * 4; + const int dcdy = plane[j].step[2] * 4; + const int cox = c[j] + plane[j].eo * 4; + const int cio = c[j] + plane[j].ei * 4 - 1; + + outmask |= build_mask(cox, dcdx, dcdy); + partmask |= build_mask(cio, dcdx, dcdy); } if (outmask == 0xffff) @@ -151,7 +146,7 @@ TAG(lp_rast_triangle)(struct lp_rasterizer_task *task, struct lp_rast_plane plane[NR_PLANES]; int c[NR_PLANES]; unsigned outmask, inmask, partmask, partial_mask; - unsigned i, j, nr_planes = 0; + unsigned j, nr_planes = 0; while (plane_mask) { int i = ffs(plane_mask) - 1; @@ -165,21 +160,17 @@ TAG(lp_rast_triangle)(struct lp_rasterizer_task *task, partmask = 0; /* outside one or more trivial accept planes */ for (j = 0; j < NR_PLANES; j++) { - const int *step = plane[j].step; - const int eo = plane[j].eo * 16; - const int ei = plane[j].ei * 16; - int cox, cio; - c[j] = plane[j].c + plane[j].dcdy * y - plane[j].dcdx * x; - cox = c[j] + eo; - cio = ei - 1 - eo; - - for (i = 0; i < 16; i++) { - int out = cox + step[i] * 16; - int part = out + cio; - outmask |= (out >> 31) & (1 << i); - partmask |= (part >> 31) & (1 << i); - } + } + + for (j = 0; j < NR_PLANES; j++) { + const int dcdx = plane[j].step[1] * 16; + const int dcdy = plane[j].step[2] * 16; + const int cox = c[j] + plane[j].eo * 16; + const int cio = c[j] + plane[j].ei * 16 - 1; + + outmask |= build_mask(cox, dcdx, dcdy); + partmask |= build_mask(cio, dcdx, dcdy); } if (outmask == 0xffff) -- cgit v1.2.3 From ee0d1c29eeddfa364a18783507acd4d031029ba2 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Sun, 15 Aug 2010 17:22:25 +0100 Subject: llvmpipe: don't refer to plane->step when dcdx or dcdy would do --- src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h b/src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h index 6d0be76713..905f3df213 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h +++ b/src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h @@ -51,8 +51,8 @@ TAG(do_block_4)(struct lp_rasterizer_task *task, for (j = 0; j < NR_PLANES; j++) { mask &= ~build_mask(c[j] - 1, - plane[j].step[1], - plane[j].step[2]); + -plane[j].dcdx, + plane[j].dcdy); } /* Now pass to the shader: @@ -79,8 +79,8 @@ TAG(do_block_16)(struct lp_rasterizer_task *task, partmask = 0; /* outside one or more trivial accept planes */ for (j = 0; j < NR_PLANES; j++) { - const int dcdx = plane[j].step[1] * 4; - const int dcdy = plane[j].step[2] * 4; + const int dcdx = -plane[j].dcdx * 4; + const int dcdy = plane[j].dcdy * 4; const int cox = c[j] + plane[j].eo * 4; const int cio = c[j] + plane[j].ei * 4 - 1; @@ -164,8 +164,8 @@ TAG(lp_rast_triangle)(struct lp_rasterizer_task *task, } for (j = 0; j < NR_PLANES; j++) { - const int dcdx = plane[j].step[1] * 16; - const int dcdy = plane[j].step[2] * 16; + const int dcdx = -plane[j].dcdx * 16; + const int dcdy = plane[j].dcdy * 16; const int cox = c[j] + plane[j].eo * 16; const int cio = c[j] + plane[j].ei * 16 - 1; -- cgit v1.2.3 From 4c0641454b952f2c240de8c83511703f98e1f72f Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Sun, 15 Aug 2010 17:24:54 +0100 Subject: llvmpipe: eliminate last usage of step array in rast_tmp.h For 16 and 64 pixel levels, calculate a mask which is linear in x and y (ie not in the swizzle layout). When iterating over full and partial masks, figure out position by manipulating the bit number set in the mask, rather than relying on postion arrays. Similarly, calculate the lower-level c values from dcdx, dcdy and the position rather than relying on the step array. --- src/gallium/drivers/llvmpipe/lp_rast_tri.c | 29 +++++++++++++++++ src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h | 44 ++++++++++++++++---------- 2 files changed, 57 insertions(+), 16 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri.c b/src/gallium/drivers/llvmpipe/lp_rast_tri.c index 28a0446172..8ecd0567df 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_rast_tri.c @@ -144,6 +144,35 @@ build_mask(int c, int dcdx, int dcdy) return mask; } +static INLINE unsigned +build_mask_linear(int c, int dcdx, int dcdy) +{ + int mask = 0; + + int c0 = c; + int c1 = c0 + dcdy; + int c2 = c1 + dcdy; + int c3 = c2 + dcdy; + + mask |= ((c0 + 0 * dcdx) >> 31) & (1 << 0); + mask |= ((c0 + 1 * dcdx) >> 31) & (1 << 1); + mask |= ((c0 + 2 * dcdx) >> 31) & (1 << 2); + mask |= ((c0 + 3 * dcdx) >> 31) & (1 << 3); + mask |= ((c1 + 0 * dcdx) >> 31) & (1 << 4); + mask |= ((c1 + 1 * dcdx) >> 31) & (1 << 5); + mask |= ((c1 + 2 * dcdx) >> 31) & (1 << 6); + mask |= ((c1 + 3 * dcdx) >> 31) & (1 << 7); + mask |= ((c2 + 0 * dcdx) >> 31) & (1 << 8); + mask |= ((c2 + 1 * dcdx) >> 31) & (1 << 9); + mask |= ((c2 + 2 * dcdx) >> 31) & (1 << 10); + mask |= ((c2 + 3 * dcdx) >> 31) & (1 << 11); + mask |= ((c3 + 0 * dcdx) >> 31) & (1 << 12); + mask |= ((c3 + 1 * dcdx) >> 31) & (1 << 13); + mask |= ((c3 + 2 * dcdx) >> 31) & (1 << 14); + mask |= ((c3 + 3 * dcdx) >> 31) & (1 << 15); + + return mask; +} #define TAG(x) x##_1 diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h b/src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h index 905f3df213..98ebcfa870 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h +++ b/src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h @@ -84,8 +84,8 @@ TAG(do_block_16)(struct lp_rasterizer_task *task, const int cox = c[j] + plane[j].eo * 4; const int cio = c[j] + plane[j].ei * 4 - 1; - outmask |= build_mask(cox, dcdx, dcdy); - partmask |= build_mask(cio, dcdx, dcdy); + outmask |= build_mask_linear(cox, dcdx, dcdy); + partmask |= build_mask_linear(cio, dcdx, dcdy); } if (outmask == 0xffff) @@ -106,15 +106,19 @@ TAG(do_block_16)(struct lp_rasterizer_task *task, */ while (partial_mask) { int i = ffs(partial_mask) - 1; - int px = x + pos_table4[i][0]; - int py = y + pos_table4[i][1]; + int ix = (i & 3) * 4; + int iy = (i >> 2) * 4; + int px = x + ix; + int py = y + iy; int cx[NR_PLANES]; - for (j = 0; j < NR_PLANES; j++) - cx[j] = c[j] + plane[j].step[i] * 4; - partial_mask &= ~(1 << i); + for (j = 0; j < NR_PLANES; j++) + cx[j] = (c[j] + - plane[j].dcdx * ix + + plane[j].dcdy * iy); + TAG(do_block_4)(task, tri, plane, px, py, cx); } @@ -122,8 +126,10 @@ TAG(do_block_16)(struct lp_rasterizer_task *task, */ while (inmask) { int i = ffs(inmask) - 1; - int px = x + pos_table4[i][0]; - int py = y + pos_table4[i][1]; + int ix = (i & 3) * 4; + int iy = (i >> 2) * 4; + int px = x + ix; + int py = y + iy; inmask &= ~(1 << i); @@ -169,8 +175,8 @@ TAG(lp_rast_triangle)(struct lp_rasterizer_task *task, const int cox = c[j] + plane[j].eo * 16; const int cio = c[j] + plane[j].ei * 16 - 1; - outmask |= build_mask(cox, dcdx, dcdy); - partmask |= build_mask(cio, dcdx, dcdy); + outmask |= build_mask_linear(cox, dcdx, dcdy); + partmask |= build_mask_linear(cio, dcdx, dcdy); } if (outmask == 0xffff) @@ -191,12 +197,16 @@ TAG(lp_rast_triangle)(struct lp_rasterizer_task *task, */ while (partial_mask) { int i = ffs(partial_mask) - 1; - int px = x + pos_table16[i][0]; - int py = y + pos_table16[i][1]; + int ix = (i & 3) * 16; + int iy = (i >> 2) * 16; + int px = x + ix; + int py = y + iy; int cx[NR_PLANES]; for (j = 0; j < NR_PLANES; j++) - cx[j] = c[j] + plane[j].step[i] * 16; + cx[j] = (c[j] + - plane[j].dcdx * ix + + plane[j].dcdy * iy); partial_mask &= ~(1 << i); @@ -208,8 +218,10 @@ TAG(lp_rast_triangle)(struct lp_rasterizer_task *task, */ while (inmask) { int i = ffs(inmask) - 1; - int px = x + pos_table16[i][0]; - int py = y + pos_table16[i][1]; + int ix = (i & 3) * 16; + int iy = (i >> 2) * 16; + int px = x + ix; + int py = y + iy; inmask &= ~(1 << i); -- cgit v1.2.3 From ff26594a92df37608a3efe47e4d4f3a55bcd6bc1 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Sun, 15 Aug 2010 17:31:13 +0100 Subject: llvmpipe: remove all traces of step arrays, pos_tables No need to calculate these values any longer, nor to store them in the bin data. Improves isosurf a bit more, 115->123 fps. --- src/gallium/drivers/llvmpipe/lp_rast.h | 5 --- src/gallium/drivers/llvmpipe/lp_rast_tri.c | 46 --------------------- src/gallium/drivers/llvmpipe/lp_setup_tri.c | 63 ----------------------------- 3 files changed, 114 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/drivers/llvmpipe/lp_rast.h b/src/gallium/drivers/llvmpipe/lp_rast.h index eaf2a6f334..44319a0ad6 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.h +++ b/src/gallium/drivers/llvmpipe/lp_rast.h @@ -104,9 +104,6 @@ struct lp_rast_plane { int dcdx; int dcdy; - - /* edge/step info for 3 edges and 4x4 block of pixels */ - const int *step; }; /** @@ -119,8 +116,6 @@ struct lp_rast_triangle { /* inputs for the shader */ struct lp_rast_shader_inputs inputs; - int step[3][16]; - #ifdef DEBUG float v[3][2]; #endif diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri.c b/src/gallium/drivers/llvmpipe/lp_rast_tri.c index 8ecd0567df..980c18c024 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_rast_tri.c @@ -37,52 +37,6 @@ #include "lp_tile_soa.h" -/** - * Map an index in [0,15] to an x,y position, multiplied by 4. - * This is used to get the position of each subtile in a 4x4 - * grid of edge step values. - * Note: we can use some bit twiddling to compute these values instead - * of using a look-up table, but there's no measurable performance - * difference. - */ -static const int pos_table4[16][2] = { - { 0, 0 }, - { 4, 0 }, - { 0, 4 }, - { 4, 4 }, - { 8, 0 }, - { 12, 0 }, - { 8, 4 }, - { 12, 4 }, - { 0, 8 }, - { 4, 8 }, - { 0, 12 }, - { 4, 12 }, - { 8, 8 }, - { 12, 8 }, - { 8, 12 }, - { 12, 12 } -}; - - -static const int pos_table16[16][2] = { - { 0, 0 }, - { 16, 0 }, - { 0, 16 }, - { 16, 16 }, - { 32, 0 }, - { 48, 0 }, - { 32, 16 }, - { 48, 16 }, - { 0, 32 }, - { 16, 32 }, - { 0, 48 }, - { 16, 48 }, - { 32, 32 }, - { 48, 32 }, - { 32, 48 }, - { 48, 48 } -}; /** diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c index 20e63ae51f..393533ebee 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c @@ -61,36 +61,6 @@ struct tri_info { -static const int step_scissor_minx[16] = { - 0, 1, 0, 1, - 2, 3, 2, 3, - 0, 1, 0, 1, - 2, 3, 2, 3 -}; - -static const int step_scissor_maxx[16] = { - 0, -1, 0, -1, - -2, -3, -2, -3, - 0, -1, 0, -1, - -2, -3, -2, -3 -}; - -static const int step_scissor_miny[16] = { - 0, 0, 1, 1, - 0, 0, 1, 1, - 2, 2, 3, 3, - 2, 2, 3, 3 -}; - -static const int step_scissor_maxy[16] = { - 0, 0, -1, -1, - 0, 0, -1, -1, - -2, -2, -3, -3, - -2, -2, -3, -3 -}; - - - static INLINE int subpixel_snap(float a) @@ -618,35 +588,6 @@ do_triangle_ccw(struct lp_setup_context *setup, /* Calculate trivial accept offsets from the above. */ plane->ei = plane->dcdy - plane->dcdx - plane->eo; - - plane->step = tri->step[i]; - - /* Fill in the inputs.step[][] arrays. - * We've manually unrolled some loops here. - */ -#define SETUP_STEP(j, x, y) \ - tri->step[i][j] = y * plane->dcdy - x * plane->dcdx - - SETUP_STEP(0, 0, 0); - SETUP_STEP(1, 1, 0); - SETUP_STEP(2, 0, 1); - SETUP_STEP(3, 1, 1); - - SETUP_STEP(4, 2, 0); - SETUP_STEP(5, 3, 0); - SETUP_STEP(6, 2, 1); - SETUP_STEP(7, 3, 1); - - SETUP_STEP(8, 0, 2); - SETUP_STEP(9, 1, 2); - SETUP_STEP(10, 0, 3); - SETUP_STEP(11, 1, 3); - - SETUP_STEP(12, 2, 2); - SETUP_STEP(13, 3, 2); - SETUP_STEP(14, 2, 3); - SETUP_STEP(15, 3, 3); -#undef STEP } @@ -669,28 +610,24 @@ do_triangle_ccw(struct lp_setup_context *setup, * these planes elsewhere. */ if (nr_planes == 7) { - tri->plane[3].step = step_scissor_minx; tri->plane[3].dcdx = -1; tri->plane[3].dcdy = 0; tri->plane[3].c = 1-minx; tri->plane[3].ei = 0; tri->plane[3].eo = 1; - tri->plane[4].step = step_scissor_maxx; tri->plane[4].dcdx = 1; tri->plane[4].dcdy = 0; tri->plane[4].c = maxx; tri->plane[4].ei = -1; tri->plane[4].eo = 0; - tri->plane[5].step = step_scissor_miny; tri->plane[5].dcdx = 0; tri->plane[5].dcdy = 1; tri->plane[5].c = 1-miny; tri->plane[5].ei = 0; tri->plane[5].eo = 1; - tri->plane[6].step = step_scissor_maxy; tri->plane[6].dcdx = 0; tri->plane[6].dcdy = -1; tri->plane[6].c = maxy; -- cgit v1.2.3 From 85d9bc236d6a8ff8f12cbc2150f8c3740354f573 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Sun, 15 Aug 2010 17:40:39 +0100 Subject: llvmpipe: consolidate several loops in lp_rast_triangle --- src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h | 33 ++++++++++++-------------- 1 file changed, 15 insertions(+), 18 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h b/src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h index 98ebcfa870..43f72d8ca8 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h +++ b/src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h @@ -152,31 +152,28 @@ TAG(lp_rast_triangle)(struct lp_rasterizer_task *task, struct lp_rast_plane plane[NR_PLANES]; int c[NR_PLANES]; unsigned outmask, inmask, partmask, partial_mask; - unsigned j, nr_planes = 0; + unsigned j = 0; - while (plane_mask) { - int i = ffs(plane_mask) - 1; - plane[nr_planes] = tri->plane[i]; - plane_mask &= ~(1 << i); - nr_planes++; - }; - - assert(nr_planes == NR_PLANES); outmask = 0; /* outside one or more trivial reject planes */ partmask = 0; /* outside one or more trivial accept planes */ - for (j = 0; j < NR_PLANES; j++) { + while (plane_mask) { + int i = ffs(plane_mask) - 1; + plane[j] = tri->plane[i]; + plane_mask &= ~(1 << i); c[j] = plane[j].c + plane[j].dcdy * y - plane[j].dcdx * x; - } - for (j = 0; j < NR_PLANES; j++) { - const int dcdx = -plane[j].dcdx * 16; - const int dcdy = plane[j].dcdy * 16; - const int cox = c[j] + plane[j].eo * 16; - const int cio = c[j] + plane[j].ei * 16 - 1; + { + const int dcdx = -plane[j].dcdx * 16; + const int dcdy = plane[j].dcdy * 16; + const int cox = c[j] + plane[j].eo * 16; + const int cio = c[j] + plane[j].ei * 16 - 1; - outmask |= build_mask_linear(cox, dcdx, dcdy); - partmask |= build_mask_linear(cio, dcdx, dcdy); + outmask |= build_mask_linear(cox, dcdx, dcdy); + partmask |= build_mask_linear(cio, dcdx, dcdy); + } + + j++; } if (outmask == 0xffff) -- cgit v1.2.3